diff options
Diffstat (limited to 'studiorender')
| -rw-r--r-- | studiorender/flexrenderdata.cpp | 238 | ||||
| -rw-r--r-- | studiorender/flexrenderdata.h | 335 | ||||
| -rw-r--r-- | studiorender/ihvtestcopy.pl | 13 | ||||
| -rw-r--r-- | studiorender/r_studio.cpp | 392 | ||||
| -rw-r--r-- | studiorender/r_studiodecal.cpp | 1990 | ||||
| -rw-r--r-- | studiorender/r_studiodraw.cpp | 2986 | ||||
| -rw-r--r-- | studiorender/r_studiodraw_computeflexedvertex.cpp | 1621 | ||||
| -rw-r--r-- | studiorender/r_studioflex.cpp | 928 | ||||
| -rw-r--r-- | studiorender/r_studiogettriangles.cpp | 166 | ||||
| -rw-r--r-- | studiorender/r_studiolight.cpp | 542 | ||||
| -rw-r--r-- | studiorender/r_studiolight.h | 49 | ||||
| -rw-r--r-- | studiorender/r_studiostats.cpp | 389 | ||||
| -rw-r--r-- | studiorender/studiorender.cpp | 762 | ||||
| -rw-r--r-- | studiorender/studiorender.h | 931 | ||||
| -rw-r--r-- | studiorender/studiorender.vpc | 114 | ||||
| -rw-r--r-- | studiorender/studiorendercontext.cpp | 2454 | ||||
| -rw-r--r-- | studiorender/studiorendercontext.h | 246 | ||||
| -rw-r--r-- | studiorender/xbox/xbox.def | 3 |
18 files changed, 14159 insertions, 0 deletions
diff --git a/studiorender/flexrenderdata.cpp b/studiorender/flexrenderdata.cpp new file mode 100644 index 0000000..5162bd8 --- /dev/null +++ b/studiorender/flexrenderdata.cpp @@ -0,0 +1,238 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// + +#include "flexrenderdata.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +//----------------------------------------------------------------------------- +// Constructor +//----------------------------------------------------------------------------- + +CCachedRenderData::CCachedRenderData() : m_CurrentTag(0), m_pFirstFlexIndex(0), + m_pFirstWorldIndex(0) +{ +#ifdef _DEBUG + int i; + float val = VEC_T_NAN; + for( i = 0; i < MAXSTUDIOFLEXVERTS; i++ ) + { + m_pFlexVerts[i].m_Position[0] = val; + m_pFlexVerts[i].m_Position[1] = val; + m_pFlexVerts[i].m_Position[2] = val; + m_pFlexVerts[i].m_Normal[0] = val; + m_pFlexVerts[i].m_Normal[1] = val; + m_pFlexVerts[i].m_Normal[2] = val; + + m_pThinFlexVerts[i].m_Position[0] = val; + m_pThinFlexVerts[i].m_Position[1] = val; + m_pThinFlexVerts[i].m_Position[2] = val; + m_pThinFlexVerts[i].m_Normal[0] = val; + m_pThinFlexVerts[i].m_Normal[1] = val; + m_pThinFlexVerts[i].m_Normal[2] = val; + + m_pFlexVerts[i].m_TangentS[0] = val; + m_pFlexVerts[i].m_TangentS[1] = val; + m_pFlexVerts[i].m_TangentS[2] = val; + m_pFlexVerts[i].m_TangentS[3] = val; + } +#endif +} + +//----------------------------------------------------------------------------- +// Call this before rendering the model +//----------------------------------------------------------------------------- + +void CCachedRenderData::StartModel() +{ + ++m_CurrentTag; + m_IndexCount = 0; + m_FlexVertexCount = 0; + m_ThinFlexVertexCount = 0; + m_WorldVertexCount = 0; + m_pFirstFlexIndex = 0; + m_pFirstThinFlexIndex = 0; + m_pFirstWorldIndex = 0; +} + +//----------------------------------------------------------------------------- +// Used to hook ourselves into a particular body part, model, and mesh +//----------------------------------------------------------------------------- + +void CCachedRenderData::SetBodyPart( int bodypart ) +{ + m_Body = bodypart; + m_CacheDict.EnsureCount(m_Body+1); + m_Model = m_Mesh = -1; + m_pFirstFlexIndex = 0; + m_pFirstThinFlexIndex = 0; + m_pFirstWorldIndex = 0; +} + +void CCachedRenderData::SetModel( int model ) +{ + Assert(m_Body >= 0); + m_Model = model; + m_CacheDict[m_Body].EnsureCount(m_Model+1); + m_Mesh = -1; + m_pFirstFlexIndex = 0; + m_pFirstThinFlexIndex = 0; + m_pFirstWorldIndex = 0; +} + +void CCachedRenderData::SetMesh( int mesh ) +{ + Assert((m_Model >= 0) && (m_Body >= 0)); + + m_Mesh = mesh; + m_CacheDict[m_Body][m_Model].EnsureCount(m_Mesh+1); + + // At this point, we should have all 3 defined. + CacheDict_t& dict = m_CacheDict[m_Body][m_Model][m_Mesh]; + + if (dict.m_Tag == m_CurrentTag) + { + m_pFirstFlexIndex = &m_pFlexIndex[dict.m_FirstIndex]; + m_pFirstThinFlexIndex = &m_pThinFlexIndex[dict.m_FirstIndex]; + m_pFirstWorldIndex = &m_pWorldIndex[dict.m_FirstIndex]; + } + else + { + m_pFirstFlexIndex = 0; + m_pFirstThinFlexIndex = 0; + m_pFirstWorldIndex = 0; + } +} + + +//----------------------------------------------------------------------------- +// Used to set up a flex computation +//----------------------------------------------------------------------------- + +bool CCachedRenderData::IsFlexComputationDone( ) const +{ + Assert((m_Model >= 0) && (m_Body >= 0) && (m_Mesh >= 0)); + + // Lets create the dictionary entry + // If the tags match, that means we're doing the computation twice!!! + CacheDict_t const& dict = m_CacheDict[m_Body][m_Model][m_Mesh]; + return (dict.m_FlexTag == m_CurrentTag); +} + +//----------------------------------------------------------------------------- +// Used to set up a computation (modifies vertex data) +//----------------------------------------------------------------------------- + +void CCachedRenderData::SetupComputation( mstudiomesh_t *pMesh, bool flexComputation ) +{ + Assert((m_Model >= 0) && (m_Body >= 0) && (m_Mesh >= 0)); +// Assert( !m_pFirstIndex ); + + // Lets create the dictionary entry + // If the tags match, that means we're doing the computation twice!!! + CacheDict_t& dict = m_CacheDict[m_Body][m_Model][m_Mesh]; + if (dict.m_Tag != m_CurrentTag) + { + dict.m_FirstIndex = m_IndexCount; + dict.m_IndexCount = pMesh->numvertices; + dict.m_Tag = m_CurrentTag; + m_IndexCount += dict.m_IndexCount; + } + + if (flexComputation) + dict.m_FlexTag = m_CurrentTag; + + m_pFirstFlexIndex = &m_pFlexIndex[dict.m_FirstIndex]; + m_pFirstThinFlexIndex = &m_pThinFlexIndex[dict.m_FirstIndex]; + m_pFirstWorldIndex = &m_pWorldIndex[dict.m_FirstIndex]; +} + +//----------------------------------------------------------------------------- +// Creates a new flexed vertex to be associated with a vertex +//----------------------------------------------------------------------------- + +CachedPosNormTan_t* CCachedRenderData::CreateFlexVertex( int vertex ) +{ + Assert( m_pFirstFlexIndex ); + Assert( m_pFirstFlexIndex[vertex].m_Tag != m_CurrentTag ); + + Assert ( m_FlexVertexCount < MAXSTUDIOFLEXVERTS ); + if ( m_FlexVertexCount >= MAXSTUDIOFLEXVERTS ) + return NULL; + + // Point the flex list to the new flexed vertex + m_pFirstFlexIndex[vertex].m_Tag = m_CurrentTag; + m_pFirstFlexIndex[vertex].m_VertexIndex = m_FlexVertexCount; + + // Add a new flexed vert to the flexed vertex list + ++m_FlexVertexCount; + + return GetFlexVertex( vertex ); +} + +//----------------------------------------------------------------------------- +// Creates a new flexed vertex to be associated with a vertex +//----------------------------------------------------------------------------- + +CachedPosNorm_t* CCachedRenderData::CreateThinFlexVertex( int vertex ) +{ + Assert( m_pFirstThinFlexIndex ); + Assert( m_pFirstThinFlexIndex[vertex].m_Tag != m_CurrentTag ); + + Assert ( m_ThinFlexVertexCount < MAXSTUDIOFLEXVERTS ); + if ( m_ThinFlexVertexCount >= MAXSTUDIOFLEXVERTS ) + return NULL; + + // Point the flex list to the new flexed vertex + m_pFirstThinFlexIndex[vertex].m_Tag = m_CurrentTag; + m_pFirstThinFlexIndex[vertex].m_VertexIndex = m_ThinFlexVertexCount; + + // Add a new flexed vert to the thin flexed vertex list + ++m_ThinFlexVertexCount; + + return GetThinFlexVertex( vertex ); +} + +//----------------------------------------------------------------------------- +// Re-normalize the surface normals and tangents of the flexed vertices +// No thin ones since they're intended to be deltas, not unit vectors +//----------------------------------------------------------------------------- +void CCachedRenderData::RenormalizeFlexVertices( bool bHasTangentData ) +{ + int i; + + for (i = 0; i < m_FlexVertexCount; i++) + { + m_pFlexVerts[ i ].m_Normal.NormalizeInPlace(); + if (bHasTangentData) + { + m_pFlexVerts[ i ].m_TangentS.AsVector3D().NormalizeInPlace(); + } + } +} + +//----------------------------------------------------------------------------- +// Creates a new flexed vertex to be associated with a vertex +//----------------------------------------------------------------------------- + +CachedPosNorm_t* CCachedRenderData::CreateWorldVertex( int vertex ) +{ + Assert( m_pFirstWorldIndex ); + if ( m_pFirstWorldIndex[vertex].m_Tag != m_CurrentTag ) + { + // Point the world list to the new world vertex + Assert( m_WorldVertexCount < MAXSTUDIOVERTS ); + m_pFirstWorldIndex[vertex].m_Tag = m_CurrentTag; + m_pFirstWorldIndex[vertex].m_VertexIndex = m_WorldVertexCount; + + // Add a new world vert to the world vertex list + ++m_WorldVertexCount; + } + return GetWorldVertex( vertex ); +} + diff --git a/studiorender/flexrenderdata.h b/studiorender/flexrenderdata.h new file mode 100644 index 0000000..ac43f53 --- /dev/null +++ b/studiorender/flexrenderdata.h @@ -0,0 +1,335 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +//=============================================================================// + +#ifndef FLEXRENDERDATA_H +#define FLEXRENDERDATA_H +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/vector.h" +#include "utlvector.h" +#include "studio.h" + +//----------------------------------------------------------------------------- +// forward declarations +//----------------------------------------------------------------------------- + +struct mstudiomesh_t; + +//----------------------------------------------------------------------------- +// Used by flex vertex data cache +//----------------------------------------------------------------------------- + +struct CachedPosNormTan_t +{ + Vector m_Position; + Vector m_Normal; + Vector4D m_TangentS; + + CachedPosNormTan_t() {} + + CachedPosNormTan_t( CachedPosNormTan_t const& src ) + { + VectorCopy( src.m_Position, m_Position ); + VectorCopy( src.m_Normal, m_Normal ); + Vector4DCopy( src.m_TangentS, m_TangentS ); + Assert( m_TangentS.w == 1.0f || m_TangentS.w == -1.0f ); + } +}; + +//----------------------------------------------------------------------------- +// Used by world (decal) vertex data cache +//----------------------------------------------------------------------------- + +struct CachedPosNorm_t +{ + Vector4DAligned m_Position; + Vector4DAligned m_Normal; + + CachedPosNorm_t() {} + + CachedPosNorm_t( CachedPosNorm_t const& src ) + { + Vector4DCopy( src.m_Position, m_Position ); + Vector4DCopy( src.m_Normal, m_Normal ); + } +}; + + +//----------------------------------------------------------------------------- +// Stores flex vertex data and world (decal) vertex data for the lifetime of the model rendering +//----------------------------------------------------------------------------- + + +class CCachedRenderData +{ +public: + // Constructor + CCachedRenderData(); + + // Call this when we start to render a new model + void StartModel(); + + // Used to hook ourselves into a particular body part, model, and mesh + void SetBodyPart( int bodypart ); + void SetModel( int model ); + void SetMesh( int mesh ); + + // For faster setup in the decal code + void SetBodyModelMesh( int body, int model, int mesh ); + + // Used to set up a flex computation + bool IsFlexComputationDone( ) const; + + // Used to set up a computation (for world or flex data) + void SetupComputation( mstudiomesh_t *pMesh, bool flexComputation = false ); + + // Is a particular vertex flexed? + bool IsVertexFlexed( int vertex ) const; + bool IsThinVertexFlexed( int vertex ) const; + + // Checks to see if the vertex is defined + bool IsVertexPositionCached( int vertex ) const; + + // Gets a flexed vertex + CachedPosNormTan_t* GetFlexVertex( int vertex ); + + // Gets a flexed vertex + CachedPosNorm_t* GetThinFlexVertex( int vertex ); + + // Creates a new flexed vertex to be associated with a vertex + CachedPosNormTan_t* CreateFlexVertex( int vertex ); + + // Creates a new flexed vertex to be associated with a vertex + CachedPosNorm_t* CreateThinFlexVertex( int vertex ); + + // Renormalizes the normals and tangents of the flex verts + void RenormalizeFlexVertices( bool bHasTangentData ); + + // Gets a decal vertex + CachedPosNorm_t* GetWorldVertex( int vertex ); + + // Creates a new decal vertex to be associated with a vertex + CachedPosNorm_t* CreateWorldVertex( int vertex ); + + template< class T > + void ComputeFlexedVertex_StreamOffset( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, T *pvanim, int vertCount, float w1, float w2, float w3, float w4 ); + +#ifdef PLATFORM_WINDOWS + void ComputeFlexedVertex_StreamOffset_Optimized( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, mstudiovertanim_t *pvanim, int vertCount, float w1, float w2, float w3, float w4); + void ComputeFlexedVertexWrinkle_StreamOffset_Optimized( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, mstudiovertanim_wrinkle_t *pvanim, int vertCount, float w1, float w2, float w3, float w4); +#endif // PLATFORM_WINDOWS + +private: + // Used to create the flex render data. maps + struct CacheIndex_t + { + unsigned short m_Tag; + unsigned short m_VertexIndex; + }; + + // A dictionary for the cached data + struct CacheDict_t + { + unsigned short m_FirstIndex; + unsigned short m_IndexCount; + unsigned short m_Tag; + unsigned short m_FlexTag; + + CacheDict_t() : m_Tag(0), m_FlexTag(0) {} + }; + + typedef CUtlVector< CacheDict_t > CacheMeshDict_t; + typedef CUtlVector< CacheMeshDict_t > CacheModelDict_t; + typedef CUtlVector< CacheModelDict_t > CacheBodyPartDict_t; + + // Flex data, allocated for the lifespan of rendering + // Can't use UtlVector due to alignment issues + int m_FlexVertexCount; + CachedPosNormTan_t m_pFlexVerts[MAXSTUDIOFLEXVERTS+1]; + + // Flex data, allocated for the lifespan of rendering + // Can't use UtlVector due to alignment issues + int m_ThinFlexVertexCount; + CachedPosNorm_t m_pThinFlexVerts[MAXSTUDIOFLEXVERTS+1]; + + // World data, allocated for the lifespan of rendering + // Can't use UtlVector due to alignment issues + int m_WorldVertexCount; + CachedPosNorm_t m_pWorldVerts[MAXSTUDIOVERTS+1]; + + // Maps actual mesh vertices into flex cache + world cache indices + int m_IndexCount; + CacheIndex_t m_pFlexIndex[MAXSTUDIOVERTS+1]; + CacheIndex_t m_pThinFlexIndex[MAXSTUDIOVERTS+1]; + CacheIndex_t m_pWorldIndex[MAXSTUDIOVERTS+1]; + + CacheBodyPartDict_t m_CacheDict; + + // The flex tag + unsigned short m_CurrentTag; + + // the current body, model, and mesh + int m_Body; + int m_Model; + int m_Mesh; + + // mapping for the current mesh to flex data + CacheIndex_t* m_pFirstFlexIndex; + CacheIndex_t* m_pFirstThinFlexIndex; + CacheIndex_t* m_pFirstWorldIndex; + + friend class CStudioRender; +}; + + +//----------------------------------------------------------------------------- +// Checks to see if the vertex is defined +//----------------------------------------------------------------------------- + +inline bool CCachedRenderData::IsVertexFlexed( int vertex ) const +{ + return (m_pFirstFlexIndex && (m_pFirstFlexIndex[vertex].m_Tag == m_CurrentTag)); +} + +inline bool CCachedRenderData::IsThinVertexFlexed( int vertex ) const +{ + return (m_pFirstThinFlexIndex && (m_pFirstThinFlexIndex[vertex].m_Tag == m_CurrentTag)); +} + +//----------------------------------------------------------------------------- +// Gets an existing flexed vertex associated with a vertex +//----------------------------------------------------------------------------- + +inline CachedPosNormTan_t* CCachedRenderData::GetFlexVertex( int vertex ) +{ + Assert( m_pFirstFlexIndex ); + Assert( m_pFirstFlexIndex[vertex].m_Tag == m_CurrentTag ); + return &m_pFlexVerts[ m_pFirstFlexIndex[vertex].m_VertexIndex ]; +} + +inline CachedPosNorm_t* CCachedRenderData::GetThinFlexVertex( int vertex ) +{ + Assert( m_pFirstThinFlexIndex ); + Assert( m_pFirstThinFlexIndex[vertex].m_Tag == m_CurrentTag ); + return &m_pThinFlexVerts[ m_pFirstThinFlexIndex[vertex].m_VertexIndex ]; +} + + + + +//----------------------------------------------------------------------------- +// Checks to see if the vertex is defined +//----------------------------------------------------------------------------- + +inline bool CCachedRenderData::IsVertexPositionCached( int vertex ) const +{ + return (m_pFirstWorldIndex && (m_pFirstWorldIndex[vertex].m_Tag == m_CurrentTag)); +} + +//----------------------------------------------------------------------------- +// Gets an existing world vertex associated with a vertex +//----------------------------------------------------------------------------- + +inline CachedPosNorm_t* CCachedRenderData::GetWorldVertex( int vertex ) +{ + Assert( m_pFirstWorldIndex ); + Assert( m_pFirstWorldIndex[vertex].m_Tag == m_CurrentTag ); + return &m_pWorldVerts[ m_pFirstWorldIndex[vertex].m_VertexIndex ]; +} + +//----------------------------------------------------------------------------- +// For faster setup in the decal code +//----------------------------------------------------------------------------- + +inline void CCachedRenderData::SetBodyModelMesh( int body, int model, int mesh) +{ + m_Body = body; + m_Model = model; + m_Mesh = mesh; + + Assert((m_Model >= 0) && (m_Body >= 0)); + m_CacheDict[m_Body][m_Model].EnsureCount(m_Mesh+1); + + // At this point, we should have all 3 defined. + CacheDict_t& dict = m_CacheDict[m_Body][m_Model][m_Mesh]; + + if (dict.m_Tag == m_CurrentTag) + { + m_pFirstFlexIndex = &m_pFlexIndex[dict.m_FirstIndex]; + m_pFirstThinFlexIndex = &m_pThinFlexIndex[dict.m_FirstIndex]; + m_pFirstWorldIndex = &m_pWorldIndex[dict.m_FirstIndex]; + } + else + { + m_pFirstFlexIndex = 0; + m_pFirstThinFlexIndex = 0; + m_pFirstWorldIndex = 0; + } +} + + +//----------------------------------------------------------------------------- +// Purpose: +// +// ** Only execute this function if device supports stream offset ** +// +// Input : pmesh - pointer to a studio mesh +// lod - integer lod (0 is most detailed) +// Output : none +//----------------------------------------------------------------------------- +template< class T > +void CCachedRenderData::ComputeFlexedVertex_StreamOffset( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, + T *pvanim, int vertCount, float w1, float w2, float w3, float w4 ) +{ + float w12 = w1 - w2; + float w34 = w3 - w4; + float flVertAnimFixedPointScale = pStudioHdr->VertAnimFixedPointScale(); + + CachedPosNorm_t *pFlexedVertex = NULL; + for (int j = 0; j < pflex->numverts; j++) + { + int n = pvanim[j].index; + + // only flex the indices that are (still) part of this mesh at this lod + if ( n >= vertCount ) + continue; + + float s = pvanim[j].speed; + float b = pvanim[j].side; + + Vector4DAligned vPosition, vNormal; + pvanim[j].GetDeltaFixed4DAligned( &vPosition, flVertAnimFixedPointScale ); + pvanim[j].GetNDeltaFixed4DAligned( &vNormal, flVertAnimFixedPointScale ); + + if ( !IsThinVertexFlexed(n) ) + { + // Add a new flexed vert to the flexed vertex list + pFlexedVertex = CreateThinFlexVertex(n); + + Assert( pFlexedVertex != NULL); + + pFlexedVertex->m_Position.InitZero(); + pFlexedVertex->m_Normal.InitZero(); + } + else + { + pFlexedVertex = GetThinFlexVertex(n); + } + + s *= 1.0f / 255.0f; + b *= 1.0f / 255.0f; + + float wa = w2 + w12 * s; + float wb = w4 + w34 * s; + float w = wa + ( wb - wa ) * b; + Vector4DWeightMAD( w, vPosition, pFlexedVertex->m_Position, vNormal, pFlexedVertex->m_Normal ); + } +} + +#endif // FLEXRENDERDATA_H diff --git a/studiorender/ihvtestcopy.pl b/studiorender/ihvtestcopy.pl new file mode 100644 index 0000000..d5c1224 --- /dev/null +++ b/studiorender/ihvtestcopy.pl @@ -0,0 +1,13 @@ +$infile = shift; +$outfile = shift; + +open INFILE, "<$infile"; +@infile = <INFILE>; +close INFILE; + +open OUTFILE, ">$outfile"; +while( shift @infile ) +{ + print OUTFILE $_; +} +close OUTFILE;
\ No newline at end of file diff --git a/studiorender/r_studio.cpp b/studiorender/r_studio.cpp new file mode 100644 index 0000000..51fb01c --- /dev/null +++ b/studiorender/r_studio.cpp @@ -0,0 +1,392 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// r_studio.cpp: routines for setting up to draw 3DStudio models +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//===========================================================================// + + +#include "studio.h" +#include "studiorender.h" +#include "studiorendercontext.h" +#include "materialsystem/imaterial.h" +#include "materialsystem/imaterialvar.h" +#include "tier0/vprof.h" +#include "tier3/tier3.h" +#include "datacache/imdlcache.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +//----------------------------------------------------------------------------- +// Figures out what kind of lighting we're gonna want +//----------------------------------------------------------------------------- +FORCEINLINE StudioModelLighting_t CStudioRender::R_StudioComputeLighting( IMaterial *pMaterial, int materialFlags, ColorMeshInfo_t *pColorMeshes ) +{ + // Here, we only do software lighting when the following conditions are met. + // 1) The material is vertex lit and we don't have hardware lighting + // 2) We're drawing an eyeball + // 3) We're drawing mouth-lit stuff + + // FIXME: When we move software lighting into the material system, only need to + // test if it's vertex lit + + Assert( pMaterial ); + bool doMouthLighting = materialFlags && (m_pStudioHdr->nummouths >= 1); + + if ( IsX360() ) + { + // 360 does not do software lighting + return doMouthLighting ? LIGHTING_MOUTH : LIGHTING_HARDWARE; + } + + bool doSoftwareLighting = doMouthLighting || + (pMaterial->IsVertexLit() && pMaterial->NeedsSoftwareLighting() ); + + if ( !m_pRC->m_Config.m_bSupportsVertexAndPixelShaders ) + { + if ( !doSoftwareLighting && pColorMeshes ) + { + pMaterial->SetUseFixedFunctionBakedLighting( true ); + } + else + { + doSoftwareLighting = true; + pMaterial->SetUseFixedFunctionBakedLighting( false ); + } + } + + StudioModelLighting_t lighting = LIGHTING_HARDWARE; + if ( doMouthLighting ) + lighting = LIGHTING_MOUTH; + else if ( doSoftwareLighting ) + lighting = LIGHTING_SOFTWARE; + + return lighting; +} + + +IMaterial* CStudioRender::R_StudioSetupSkinAndLighting( IMatRenderContext *pRenderContext, int index, IMaterial **ppMaterials, int materialFlags, + void /*IClientRenderable*/ *pClientRenderable, ColorMeshInfo_t *pColorMeshes, StudioModelLighting_t &lighting ) +{ + VPROF( "R_StudioSetupSkin" ); + IMaterial *pMaterial = NULL; + bool bCheckForConVarDrawTranslucentSubModels = false; + if( m_pRC->m_Config.bWireframe && !m_pRC->m_pForcedMaterial ) + { + if ( m_pRC->m_Config.bDrawZBufferedWireframe ) + pMaterial = m_pMaterialMRMWireframeZBuffer; + else + pMaterial = m_pMaterialMRMWireframe; + } + else if( m_pRC->m_Config.bShowEnvCubemapOnly ) + { + pMaterial = m_pMaterialModelEnvCubemap; + } + else + { + if ( !m_pRC->m_pForcedMaterial && ( m_pRC->m_nForcedMaterialType != OVERRIDE_DEPTH_WRITE && m_pRC->m_nForcedMaterialType != OVERRIDE_SSAO_DEPTH_WRITE ) ) + { + pMaterial = ppMaterials[index]; + if ( !pMaterial ) + { + Assert( 0 ); + return 0; + } + } + else + { + materialFlags = 0; + pMaterial = m_pRC->m_pForcedMaterial; + if (m_pRC->m_nForcedMaterialType == OVERRIDE_BUILD_SHADOWS) + { + // Connect the original material up to the shadow building material + // Also bind the original material so its proxies are in the correct state + static unsigned int translucentCache = 0; + IMaterialVar* pOriginalMaterialVar = pMaterial->FindVarFast( "$translucent_material", &translucentCache ); + Assert( pOriginalMaterialVar ); + IMaterial *pOriginalMaterial = ppMaterials[index]; + if ( pOriginalMaterial ) + { + // Disable any alpha modulation on the original material that was left over from when it was last rendered + pOriginalMaterial->AlphaModulate( 1.0f ); + pRenderContext->Bind( pOriginalMaterial, pClientRenderable ); + if ( pOriginalMaterial->IsTranslucent() || pOriginalMaterial->IsAlphaTested() ) + { + if ( pOriginalMaterialVar ) + pOriginalMaterialVar->SetMaterialValue( pOriginalMaterial ); + } + else + { + if ( pOriginalMaterialVar ) + pOriginalMaterialVar->SetMaterialValue( NULL ); + } + } + else + { + if ( pOriginalMaterialVar ) + pOriginalMaterialVar->SetMaterialValue( NULL ); + } + } + else if ( m_pRC->m_nForcedMaterialType == OVERRIDE_DEPTH_WRITE || m_pRC->m_nForcedMaterialType == OVERRIDE_SSAO_DEPTH_WRITE ) + { + // Disable any alpha modulation on the original material that was left over from when it was last rendered + ppMaterials[index]->AlphaModulate( 1.0f ); + + // Bail if the material is still considered translucent after setting the AlphaModulate to 1.0 + if ( ppMaterials[index]->IsTranslucent() ) + { + return NULL; + } + + static unsigned int originalTextureVarCache = 0; + IMaterialVar *pOriginalTextureVar = ppMaterials[index]->FindVarFast( "$basetexture", &originalTextureVarCache ); + + // Select proper override material + int nAlphaTest = (int) ( ppMaterials[index]->IsAlphaTested() && pOriginalTextureVar->IsTexture() ); // alpha tested base texture + int nNoCull = (int) ppMaterials[index]->IsTwoSided(); + if ( m_pRC->m_nForcedMaterialType == OVERRIDE_SSAO_DEPTH_WRITE ) + { + pMaterial = m_pSSAODepthWrite[nAlphaTest][nNoCull]; + } + else + { + pMaterial = m_pDepthWrite[nAlphaTest][nNoCull]; + } + + // If we're alpha tested, we should set up the texture variables from the original material + if ( nAlphaTest != 0 ) + { + static unsigned int originalTextureFrameVarCache = 0; + IMaterialVar *pOriginalTextureFrameVar = ppMaterials[index]->FindVarFast( "$frame", &originalTextureFrameVarCache ); + static unsigned int originalAlphaRefCache = 0; + IMaterialVar *pOriginalAlphaRefVar = ppMaterials[index]->FindVarFast( "$AlphaTestReference", &originalAlphaRefCache ); + + static unsigned int textureVarCache = 0; + IMaterialVar *pTextureVar = pMaterial->FindVarFast( "$basetexture", &textureVarCache ); + static unsigned int textureFrameVarCache = 0; + IMaterialVar *pTextureFrameVar = pMaterial->FindVarFast( "$frame", &textureFrameVarCache ); + static unsigned int alphaRefCache = 0; + IMaterialVar *pAlphaRefVar = pMaterial->FindVarFast( "$AlphaTestReference", &alphaRefCache ); + + if ( pOriginalTextureVar->IsTexture() ) // If $basetexture is defined + { + if( pTextureVar && pOriginalTextureVar ) + { + pTextureVar->SetTextureValue( pOriginalTextureVar->GetTextureValue() ); + } + + if( pTextureFrameVar && pOriginalTextureFrameVar ) + { + pTextureFrameVar->SetIntValue( pOriginalTextureFrameVar->GetIntValue() ); + } + + if( pAlphaRefVar && pOriginalAlphaRefVar ) + { + pAlphaRefVar->SetFloatValue( pOriginalAlphaRefVar->GetFloatValue() ); + } + } + } + } + } + + // Set this bool to check after the bind below + bCheckForConVarDrawTranslucentSubModels = true; + + if ( m_pRC->m_nForcedMaterialType != OVERRIDE_DEPTH_WRITE && m_pRC->m_nForcedMaterialType != OVERRIDE_SSAO_DEPTH_WRITE) + { + // Try to set the alpha based on the blend + pMaterial->AlphaModulate( m_pRC->m_AlphaMod ); + + // Try to set the color based on the colormod + pMaterial->ColorModulate( m_pRC->m_ColorMod[0], m_pRC->m_ColorMod[1], m_pRC->m_ColorMod[2] ); + } + } + + lighting = R_StudioComputeLighting( pMaterial, materialFlags, pColorMeshes ); + if ( lighting == LIGHTING_MOUTH ) + { + if ( !m_pRC->m_Config.bTeeth || !R_TeethAreVisible() ) + return NULL; + // skin it and light it, but only if we need to. + if ( m_pRC->m_Config.m_bSupportsVertexAndPixelShaders ) + { + R_MouthSetupVertexShader( pMaterial ); + } + } + + // TODO: It's possible we don't want to use the color texels--for example because of a convar. + // We should check that here in addition to whether or not we have the data available. + static unsigned int lightmapVarCache = 0; + IMaterialVar *pLightmapVar = pMaterial->FindVarFast( "$lightmap", &lightmapVarCache ); + if ( pLightmapVar ) + { + ITexture* newTex = pColorMeshes ? pColorMeshes->m_pLightmap : NULL; + + if (newTex) + pLightmapVar->SetTextureValue(newTex); + else + pLightmapVar->SetUndefined(); + } + + pRenderContext->Bind( pMaterial, pClientRenderable ); + + if ( bCheckForConVarDrawTranslucentSubModels ) + { + bool translucent = pMaterial->IsTranslucent(); + + if (( m_bDrawTranslucentSubModels && !translucent ) || + ( !m_bDrawTranslucentSubModels && translucent )) + { + m_bSkippedMeshes = true; + return NULL; + } + } + + return pMaterial; +} + + + +//============================================================================= + + +/* +================= +R_StudioSetupModel + based on the body part, figure out which mesh it should be using. +inputs: +outputs: + pstudiomesh + pmdl +================= +*/ +int R_StudioSetupModel( int bodypart, int entity_body, mstudiomodel_t **ppSubModel, + const studiohdr_t *pStudioHdr ) +{ + int index; + mstudiobodyparts_t *pbodypart; + + if (bodypart > pStudioHdr->numbodyparts) + { + ConDMsg ("R_StudioSetupModel: no such bodypart %d\n", bodypart); + bodypart = 0; + } + + pbodypart = pStudioHdr->pBodypart( bodypart ); + + if ( pbodypart->base == 0 ) + { + Warning( "Model has missing body part: %s\n", pStudioHdr->pszName() ); + Assert( 0 ); + } + index = entity_body / pbodypart->base; + index = index % pbodypart->nummodels; + + Assert( ppSubModel ); + *ppSubModel = pbodypart->pModel( index ); + return index; +} + + + +//----------------------------------------------------------------------------- +// Generates the PoseToBone Matrix nessecary to align the given bone with the +// world. +//----------------------------------------------------------------------------- +static void ScreenAlignBone( matrix3x4_t *pPoseToWorld, mstudiobone_t *pCurBone, + const Vector& vecViewOrigin, const matrix3x4_t &boneToWorld ) +{ + // Grab the world translation: + Vector vT( boneToWorld[0][3], boneToWorld[1][3], boneToWorld[2][3] ); + + // Construct the coordinate frame: + // Initialized to get rid of compiler + Vector vX, vY, vZ; + + if( pCurBone->flags & BONE_SCREEN_ALIGN_SPHERE ) + { + vX = vecViewOrigin - vT; + VectorNormalize(vX); + vZ = Vector(0,0,1); + vY = vZ.Cross(vX); + VectorNormalize(vY); + vZ = vX.Cross(vY); + VectorNormalize(vZ); + } + else + { + Assert( pCurBone->flags & BONE_SCREEN_ALIGN_CYLINDER ); + vX.Init( boneToWorld[0][0], boneToWorld[1][0], boneToWorld[2][0] ); + vZ = vecViewOrigin - vT; + VectorNormalize(vZ); + vY = vZ.Cross(vX); + VectorNormalize(vY); + vZ = vX.Cross(vY); + VectorNormalize(vZ); + } + + matrix3x4_t matBoneBillboard( + vX.x, vY.x, vZ.x, vT.x, + vX.y, vY.y, vZ.y, vT.y, + vX.z, vY.z, vZ.z, vT.z ); + ConcatTransforms( matBoneBillboard, pCurBone->poseToBone, *pPoseToWorld ); +} + + +//----------------------------------------------------------------------------- +// Computes PoseToWorld from BoneToWorld +//----------------------------------------------------------------------------- +void ComputePoseToWorld( matrix3x4_t *pPoseToWorld, studiohdr_t *pStudioHdr, int boneMask, const Vector& vecViewOrigin, const matrix3x4_t *pBoneToWorld ) +{ + if ( pStudioHdr->flags & STUDIOHDR_FLAGS_STATIC_PROP ) + { + // by definition, these always have an identity poseToBone transform + MatrixCopy( pBoneToWorld[ 0 ], pPoseToWorld[ 0 ] ); + return; + } + + if ( !pStudioHdr->pLinearBones() ) + { + // convert bone to world transformations into pose to world transformations + for (int i = 0; i < pStudioHdr->numbones; i++) + { + mstudiobone_t *pCurBone = pStudioHdr->pBone( i ); + if ( !(pCurBone->flags & boneMask) ) + continue; + + ConcatTransforms( pBoneToWorld[ i ], pCurBone->poseToBone, pPoseToWorld[ i ] ); + } + } + else + { + mstudiolinearbone_t *pLinearBones = pStudioHdr->pLinearBones(); + + // convert bone to world transformations into pose to world transformations + for (int i = 0; i < pStudioHdr->numbones; i++) + { + if ( !(pLinearBones->flags(i) & boneMask) ) + continue; + + ConcatTransforms( pBoneToWorld[ i ], pLinearBones->poseToBone(i), pPoseToWorld[ i ] ); + } + } + +#if 0 + // These don't seem to be used in any existing QC file, re-enable in a future project? + // Pretransform + if( !( pCurBone->flags & ( BONE_SCREEN_ALIGN_SPHERE | BONE_SCREEN_ALIGN_CYLINDER ))) + { + ConcatTransforms( pBoneToWorld[ i ], pCurBone->poseToBone, pPoseToWorld[ i ] ); + } + else + { + // If this bone is screen aligned, then generate a PoseToWorld matrix that billboards the bone + ScreenAlignBone( &pPoseToWorld[i], pCurBone, vecViewOrigin, pBoneToWorld[i] ); + } +#endif +} + + diff --git a/studiorender/r_studiodecal.cpp b/studiorender/r_studiodecal.cpp new file mode 100644 index 0000000..52ff350 --- /dev/null +++ b/studiorender/r_studiodecal.cpp @@ -0,0 +1,1990 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#include "studiorender.h" +#include "studiorendercontext.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "materialsystem/imesh.h" +#include "materialsystem/imaterial.h" +#include "mathlib/mathlib.h" +#include "optimize.h" +#include "cmodel.h" +#include "materialsystem/imaterialvar.h" +#include "convar.h" + +#include "tier0/vprof.h" +#include "tier0/minidump.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + + +static int g_nTotalDecalVerts; + +//----------------------------------------------------------------------------- +// Decal triangle clip flags +//----------------------------------------------------------------------------- +enum +{ + DECAL_CLIP_MINUSU = 0x1, + DECAL_CLIP_MINUSV = 0x2, + DECAL_CLIP_PLUSU = 0x4, + DECAL_CLIP_PLUSV = 0x8, +}; + + +#define MAX_DECAL_INDICES_PER_MODEL 2048 + + +//----------------------------------------------------------------------------- +// Triangle clipping state +//----------------------------------------------------------------------------- +struct DecalClipState_t +{ + // Number of used vertices + int m_VertCount; + + // Indices into the clip verts array of the used vertices + int m_Indices[2][7]; + + // Helps us avoid copying the m_Indices array by using double-buffering + bool m_Pass; + + // Add vertices we've started with and had to generate due to clipping + int m_ClipVertCount; + DecalVertex_t m_ClipVerts[16]; + + // Union of the decal triangle clip flags above for each vert + int m_ClipFlags[16]; + + DecalClipState_t() {} + +private: + // Copy constructors are not allowed + DecalClipState_t( const DecalClipState_t& src ); +}; + + +//----------------------------------------------------------------------------- +// +// Lovely decal code begins here... ABANDON ALL HOPE YE WHO ENTER!!! +// +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// Functions to make vertex opaque +//----------------------------------------------------------------------------- + +#ifdef COMPACT_DECAL_VERT +#define GetVecTexCoord( v ) (v.operator Vector2D()) +#define GetVecNormal( v ) (v.operator Vector()) +#else +#define GetVecTexCoord( v ) v +#define GetVecNormal( v ) v +#endif + + +//----------------------------------------------------------------------------- +// Remove decal from LRU +//----------------------------------------------------------------------------- +void CStudioRender::RemoveDecalListFromLRU( StudioDecalHandle_t h ) +{ + DecalLRUListIndex_t i, next; + for ( i = m_DecalLRU.Head(); i != m_DecalLRU.InvalidIndex(); i = next ) + { + next = m_DecalLRU.Next(i); + if ( m_DecalLRU[i].m_hDecalHandle == h ) + { + m_DecalLRU.Remove( i ); + } + } +} + + +//----------------------------------------------------------------------------- +// Create, destroy list of decals for a particular model +//----------------------------------------------------------------------------- +StudioDecalHandle_t CStudioRender::CreateDecalList( studiohwdata_t *pHardwareData ) +{ + if ( !pHardwareData || pHardwareData->m_NumLODs <= 0 ) + return STUDIORENDER_DECAL_INVALID; + + // NOTE: This function is called directly without queueing + m_DecalMutex.Lock(); + int handle = m_DecalList.AddToTail(); + m_DecalMutex.Unlock(); + + m_DecalList[handle].m_pHardwareData = pHardwareData; + m_DecalList[handle].m_pLod = new DecalLod_t[pHardwareData->m_NumLODs]; + m_DecalList[handle].m_nLods = pHardwareData->m_NumLODs; + + for (int i = 0; i < pHardwareData->m_NumLODs; i++) + { + m_DecalList[handle].m_pLod[i].m_FirstMaterial = m_DecalMaterial.InvalidIndex(); + } + + return (StudioDecalHandle_t)handle; +} + +void CStudioRender::DestroyDecalList( StudioDecalHandle_t hDecal ) +{ + if ( hDecal == STUDIORENDER_DECAL_INVALID ) + return; + + RemoveDecalListFromLRU( hDecal ); + + int h = (int)hDecal; + // Clean up + for (int i = 0; i < m_DecalList[h].m_nLods; i++ ) + { + // Blat out all geometry associated with all materials + unsigned short mat = m_DecalList[h].m_pLod[i].m_FirstMaterial; + unsigned short next; + while (mat != m_DecalMaterial.InvalidIndex()) + { + next = m_DecalMaterial.Next(mat); + + g_nTotalDecalVerts -= m_DecalMaterial[mat].m_Vertices.Count(); + + m_DecalMaterial.Free(mat); + + mat = next; + } + } + + delete[] m_DecalList[h].m_pLod; + m_DecalList[h].m_pLod = NULL; + + m_DecalMutex.Lock(); + m_DecalList.Remove( h ); + m_DecalMutex.Unlock(); +} + + +//----------------------------------------------------------------------------- +// Transformation/Rotation for decals +//----------------------------------------------------------------------------- +#define FRONTFACING_EPS 0.1f + +inline bool CStudioRender::IsFrontFacing( const Vector * pnorm, const mstudioboneweight_t * pboneweight ) +{ + // NOTE: This only works to rotate normals if there's no scale in the + // pose to world transforms. If we ever add scale, we'll need to + // multiply by the inverse transpose of the pose to decal + + float z; + if (pboneweight->numbones == 1) + { + z = DotProduct( pnorm->Base(), m_PoseToDecal[(unsigned)pboneweight->bone[0]][2] ); + } + else + { + float zbone; + + z = 0; + for (int i = 0; i < pboneweight->numbones; i++) + { + zbone = DotProduct( pnorm->Base(), m_PoseToDecal[(unsigned)pboneweight->bone[i]][2] ); + z += zbone * pboneweight->weight[i]; + } + } + + return ( z >= FRONTFACING_EPS ); +} + +inline bool CStudioRender::TransformToDecalSpace( DecalBuildInfo_t& build, const Vector& pos, + mstudioboneweight_t *pboneweight, Vector2D& uv ) +{ + // NOTE: This only works to rotate normals if there's no scale in the + // pose to world transforms. If we ever add scale, we'll need to + // multiply by the inverse transpose of the pose to world + + if (pboneweight->numbones == 1) + { + uv.x = DotProduct( pos.Base(), m_PoseToDecal[(unsigned)pboneweight->bone[0]][0] ) + + m_PoseToDecal[(unsigned)pboneweight->bone[0]][0][3]; + uv.y = DotProduct( pos.Base(), m_PoseToDecal[(unsigned)pboneweight->bone[0]][1] ) + + m_PoseToDecal[(unsigned)pboneweight->bone[0]][1][3]; + } + else + { + uv.x = uv.y = 0; + float ubone, vbone; + for (int i = 0; i < pboneweight->numbones; i++) + { + ubone = DotProduct( pos.Base(), m_PoseToDecal[(unsigned)pboneweight->bone[i]][0] ) + + m_PoseToDecal[(unsigned)pboneweight->bone[i]][0][3]; + vbone = DotProduct( pos.Base(), m_PoseToDecal[(unsigned)pboneweight->bone[i]][1] ) + + m_PoseToDecal[(unsigned)pboneweight->bone[i]][1][3]; + + uv.x += ubone * pboneweight->weight[i]; + uv.y += vbone * pboneweight->weight[i]; + } + } + + if (!build.m_NoPokeThru) + return true; + + // No poke thru? do culling.... + float z; + if (pboneweight->numbones == 1) + { + z = DotProduct( pos.Base(), m_PoseToDecal[(unsigned)pboneweight->bone[0]][2] ) + + m_PoseToDecal[(unsigned)pboneweight->bone[0]][2][3]; + } + else + { + z = 0; + float zbone; + for (int i = 0; i < pboneweight->numbones; i++) + { + zbone = DotProduct( pos.Base(), m_PoseToDecal[(unsigned)pboneweight->bone[i]][2] ) + + m_PoseToDecal[(unsigned)pboneweight->bone[i]][2][3]; + z += zbone * pboneweight->weight[i]; + } + } + + return (fabs(z) < build.m_Radius ); +} + + +//----------------------------------------------------------------------------- +// Projects a decal onto a mesh +//----------------------------------------------------------------------------- +bool CStudioRender::ProjectDecalOntoMesh( DecalBuildInfo_t& build, DecalBuildVertexInfo_t* pVertexInfo, mstudiomesh_t *pMesh ) +{ + float invRadius = (build.m_Radius != 0.0f) ? 1.0f / build.m_Radius : 1.0f; + + const mstudio_meshvertexdata_t *vertData = pMesh->GetVertexData( build.m_pStudioHdr ); + const thinModelVertices_t *thinVertData = NULL; + + if ( !vertData ) + { + // For most models (everything that's not got flex data), the vertex data is 'thinned' on load to save memory + thinVertData = pMesh->GetThinVertexData( build.m_pStudioHdr ); + if ( !thinVertData ) + return false; + } + + // For this to work, the plane and intercept must have been transformed + // into pose space. Also, we'll not be bothering with flexes. + for ( int j=0; j < pMesh->numvertices; ++j ) + { + mstudioboneweight_t localBoneWeights; + Vector localPosition; + Vector localNormal; + Vector * vecPosition; + Vector * vecNormal; + mstudioboneweight_t * boneWeights; + + if ( vertData ) + { + mstudiovertex_t &vert = *vertData->Vertex( j ); + vecPosition = &vert.m_vecPosition; + vecNormal = &vert.m_vecNormal; + boneWeights = &vert.m_BoneWeights; + } + else + { + thinVertData->GetMeshPosition( pMesh, j, &localPosition ); + vecPosition = &localPosition; + thinVertData->GetMeshNormal( pMesh, j, &localNormal ); + vecNormal = &localNormal; + thinVertData->GetMeshBoneWeights( pMesh, j, &localBoneWeights ); + boneWeights = &localBoneWeights; + } + + // No decal vertex yet... + pVertexInfo[j].m_VertexIndex = 0xFFFF; + pVertexInfo[j].m_UniqueID = 0xFF; + pVertexInfo[j].m_Flags = 0; + + // We need to know if the normal is pointing in the negative direction + // if so, blow off all triangles connected to that vertex. + if ( !IsFrontFacing( vecNormal, boneWeights ) ) + continue; + + pVertexInfo[j].m_Flags |= DecalBuildVertexInfo_t::FRONT_FACING; + + bool inValidArea = TransformToDecalSpace( build, *vecPosition, boneWeights, pVertexInfo[j].m_UV ); + pVertexInfo[j].m_Flags |= ( inValidArea << 1 ); + + pVertexInfo[j].m_UV *= invRadius * 0.5f; + pVertexInfo[j].m_UV[0] += 0.5f; + pVertexInfo[j].m_UV[1] += 0.5f; + } + return true; +} + + +//----------------------------------------------------------------------------- +// Computes clip flags +//----------------------------------------------------------------------------- +inline int ComputeClipFlags( Vector2D const& uv ) +{ + // Otherwise we gotta do the test + int flags = 0; + + if (uv.x < 0.0f) + flags |= DECAL_CLIP_MINUSU; + else if (uv.x > 1.0f) + flags |= DECAL_CLIP_PLUSU; + + if (uv.y < 0.0f) + flags |= DECAL_CLIP_MINUSV; + else if (uv.y > 1.0f ) + flags |= DECAL_CLIP_PLUSV; + + return flags; +} + +inline int CStudioRender::ComputeClipFlags( DecalBuildVertexInfo_t* pVertexInfo, int i ) +{ + return ::ComputeClipFlags( pVertexInfo[i].m_UV ); +} + + +//----------------------------------------------------------------------------- +// Creates a new vertex where the edge intersects the plane +//----------------------------------------------------------------------------- +static int IntersectPlane( DecalClipState_t& state, int start, int end, + int normalInd, float val ) +{ + DecalVertex_t& startVert = state.m_ClipVerts[start]; + DecalVertex_t& endVert = state.m_ClipVerts[end]; + + Vector2D dir; + Vector2DSubtract( endVert.m_TexCoord, startVert.m_TexCoord, dir ); + Assert( dir[normalInd] != 0.0f ); + float t = (val - GetVecTexCoord( startVert.m_TexCoord )[normalInd]) / dir[normalInd]; + + // Allocate a clipped vertex + DecalVertex_t& out = state.m_ClipVerts[state.m_ClipVertCount]; + int newVert = state.m_ClipVertCount++; + + // The clipped vertex has no analogue in the original mesh + out.m_MeshVertexIndex = 0xFFFF; + out.m_Mesh = 0xFFFF; + out.m_Model = ( sizeof(out.m_Model) == 1 ) ? 0xFF : 0xFFFF; + out.m_Body = ( sizeof(out.m_Body) == 1 ) ? 0xFF : 0xFFFF; + + // Interpolate position + out.m_Position[0] = startVert.m_Position[0] * (1.0 - t) + endVert.m_Position[0] * t; + out.m_Position[1] = startVert.m_Position[1] * (1.0 - t) + endVert.m_Position[1] * t; + out.m_Position[2] = startVert.m_Position[2] * (1.0 - t) + endVert.m_Position[2] * t; + + // Interpolate normal + Vector vNormal; + // FIXME: this is a bug (it's using position data to compute interpolated normals!)... not seeing any obvious artifacts, though + vNormal[0] = startVert.m_Position[0] * (1.0 - t) + endVert.m_Position[0] * t; + vNormal[1] = startVert.m_Position[1] * (1.0 - t) + endVert.m_Position[1] * t; + vNormal[2] = startVert.m_Position[2] * (1.0 - t) + endVert.m_Position[2] * t; + VectorNormalize( vNormal ); + out.m_Normal = vNormal; + + // Interpolate texture coord + Vector2D vTexCoord; + Vector2DLerp( GetVecTexCoord( startVert.m_TexCoord ), GetVecTexCoord( endVert.m_TexCoord ), t, vTexCoord ); + out.m_TexCoord = vTexCoord; + + // Compute the clip flags baby... + state.m_ClipFlags[newVert] = ComputeClipFlags( out.m_TexCoord ); + + return newVert; +} + +//----------------------------------------------------------------------------- +// Clips a triangle against a plane, use clip flags to speed it up +//----------------------------------------------------------------------------- + +static void ClipTriangleAgainstPlane( DecalClipState_t& state, int normalInd, int flag, float val ) +{ + // FIXME: Could compute the & of all the clip flags of all the verts + // as we go through the loop to do another early out + + // Ye Olde Sutherland-Hodgman clipping algorithm + int outVertCount = 0; + int start = state.m_Indices[state.m_Pass][state.m_VertCount - 1]; + bool startInside = (state.m_ClipFlags[start] & flag) == 0; + for (int i = 0; i < state.m_VertCount; ++i) + { + int end = state.m_Indices[state.m_Pass][i]; + + bool endInside = (state.m_ClipFlags[end] & flag) == 0; + if (endInside) + { + if (!startInside) + { + int clipVert = IntersectPlane( state, start, end, normalInd, val ); + state.m_Indices[!state.m_Pass][outVertCount++] = clipVert; + } + state.m_Indices[!state.m_Pass][outVertCount++] = end; + } + else + { + if (startInside) + { + int clipVert = IntersectPlane( state, start, end, normalInd, val ); + state.m_Indices[!state.m_Pass][outVertCount++] = clipVert; + } + } + start = end; + startInside = endInside; + } + + state.m_Pass = !state.m_Pass; + state.m_VertCount = outVertCount; +} + + +//----------------------------------------------------------------------------- +// Converts a mesh index to a DecalVertex_t +//----------------------------------------------------------------------------- +void CStudioRender::ConvertMeshVertexToDecalVertex( DecalBuildInfo_t& build, + int meshIndex, DecalVertex_t& decalVertex, int nGroupIndex ) +{ + // Copy over the data; + // get the texture coords from the decal planar projection + + Assert( meshIndex < MAXSTUDIOVERTS ); + + if ( build.m_pMeshVertexData ) + { + VectorCopy( *build.m_pMeshVertexData->Position( meshIndex ), decalVertex.m_Position ); + VectorCopy( *build.m_pMeshVertexData->Normal( meshIndex ), GetVecNormal( decalVertex.m_Normal ) ); + } + else + { + // At this point in the code, we should definitely have either compressed or uncompressed vertex data + Assert( build.m_pMeshThinVertexData ); + Vector position; + Vector normal; + build.m_pMeshThinVertexData->GetMeshPosition( build.m_pMesh, meshIndex, &position ); + build.m_pMeshThinVertexData->GetMeshNormal( build.m_pMesh, meshIndex, &normal ); + VectorCopy( position, decalVertex.m_Position ); + VectorCopy( normal, GetVecNormal( decalVertex.m_Normal ) ); + } + Vector2DCopy( build.m_pVertexInfo[meshIndex].m_UV, GetVecTexCoord( decalVertex.m_TexCoord ) ); + decalVertex.m_MeshVertexIndex = meshIndex; + decalVertex.m_Mesh = build.m_Mesh; + Assert( decalVertex.m_Mesh < 100 ); + decalVertex.m_Model = build.m_Model; + decalVertex.m_Body = build.m_Body; + decalVertex.m_Group = build.m_Group; + decalVertex.m_GroupIndex = nGroupIndex; +} + + +//----------------------------------------------------------------------------- +// Adds a vertex to the list of vertices for this material +//----------------------------------------------------------------------------- +inline unsigned short CStudioRender::AddVertexToDecal( DecalBuildInfo_t& build, int nMeshIndex, int nGroupIndex ) +{ + DecalBuildVertexInfo_t* pVertexInfo = build.m_pVertexInfo; + + // If we've never seen this vertex before, we need to add a new decal vert + if ( pVertexInfo[nMeshIndex].m_UniqueID != build.m_nGlobalMeshIndex ) + { + pVertexInfo[nMeshIndex].m_UniqueID = build.m_nGlobalMeshIndex; + DecalVertexList_t& decalVertexList = build.m_pDecalMaterial->m_Vertices; + + DecalVertexList_t::IndexType_t v; + v = decalVertexList.AddToTail(); + g_nTotalDecalVerts++; + + // Copy over the data; + ConvertMeshVertexToDecalVertex( build, nMeshIndex, build.m_pDecalMaterial->m_Vertices[v], nGroupIndex ); + +#ifdef _DEBUG + // Make sure clipped vertices are in the right range... + if (build.m_UseClipVert) + { + Assert( (decalVertexList[v].m_TexCoord[0] >= -1e-3) && (decalVertexList[v].m_TexCoord[0] - 1.0f < 1e-3) ); + Assert( (decalVertexList[v].m_TexCoord[1] >= -1e-3) && (decalVertexList[v].m_TexCoord[1] - 1.0f < 1e-3) ); + } +#endif + + // Store off the index of this vertex so we can reference it again + pVertexInfo[nMeshIndex].m_VertexIndex = build.m_VertexCount; + ++build.m_VertexCount; + if (build.m_FirstVertex == decalVertexList.InvalidIndex()) + { + build.m_FirstVertex = v; + } + } + + return pVertexInfo[nMeshIndex].m_VertexIndex; +} + + +//----------------------------------------------------------------------------- +// Adds a vertex to the list of vertices for this material +//----------------------------------------------------------------------------- +inline unsigned short CStudioRender::AddVertexToDecal( DecalBuildInfo_t& build, DecalVertex_t& vert ) +{ + // This creates a unique vertex + DecalVertexList_t& decalVertexList = build.m_pDecalMaterial->m_Vertices; + + // Try to see if the clipped vertex already exists in our decal list... + // Only search for matches with verts appearing in the current decal + DecalVertexList_t::IndexType_t i; + unsigned short vertexCount = 0; + for ( i = build.m_FirstVertex; i != decalVertexList.InvalidIndex(); + i = decalVertexList.Next(i), ++vertexCount ) + { + // Only bother to check against clipped vertices + if ( decalVertexList[i].GetMesh( build.m_pStudioHdr ) ) + continue; + + // They must have the same position, and normal + // texcoord will fall right out if the positions match + Vector temp; + VectorSubtract( decalVertexList[i].m_Position, vert.m_Position, temp ); + if ( (fabs(temp[0]) > 1e-3) || (fabs(temp[1]) > 1e-3) || (fabs(temp[2]) > 1e-3) ) + continue; + + VectorSubtract( decalVertexList[i].m_Normal, vert.m_Normal, temp ); + if ( (fabs(temp[0]) > 1e-3) || (fabs(temp[1]) > 1e-3) || (fabs(temp[2]) > 1e-3) ) + continue; + + return vertexCount; + } + + // This path is the path taken by clipped vertices + Assert( (vert.m_TexCoord[0] >= -1e-3) && (vert.m_TexCoord[0] - 1.0f < 1e-3) ); + Assert( (vert.m_TexCoord[1] >= -1e-3) && (vert.m_TexCoord[1] - 1.0f < 1e-3) ); + + // Must create a new vertex... + DecalVertexList_t::IndexType_t idx = decalVertexList.AddToTail(vert); + g_nTotalDecalVerts++; + if (build.m_FirstVertex == decalVertexList.InvalidIndex()) + build.m_FirstVertex = idx; + Assert( vertexCount == build.m_VertexCount ); + return build.m_VertexCount++; +} + + +//----------------------------------------------------------------------------- +// Adds the clipped triangle to the decal +//----------------------------------------------------------------------------- +void CStudioRender::AddClippedDecalToTriangle( DecalBuildInfo_t& build, DecalClipState_t& clipState ) +{ + // FIXME: Clipped vertices will almost always be shared. We + // need a way of associating clipped vertices with edges so we can share + // the clipped vertices quickly + Assert( clipState.m_VertCount <= 7 ); + + // Yeah baby yeah!! Add this sucka + int i; + unsigned short indices[7]; + for ( i = 0; i < clipState.m_VertCount; ++i) + { + // First add the vertices + int vertIdx = clipState.m_Indices[clipState.m_Pass][i]; + if (vertIdx < 3) + { + indices[i] = AddVertexToDecal( build, clipState.m_ClipVerts[vertIdx].m_MeshVertexIndex ); + } + else + { + indices[i] = AddVertexToDecal( build, clipState.m_ClipVerts[vertIdx] ); + } + } + + // Add a trifan worth of triangles + for ( i = 1; i < clipState.m_VertCount - 1; ++i) + { + MEM_ALLOC_CREDIT(); + build.m_pDecalMaterial->m_Indices.AddToTail( indices[0] ); + build.m_pDecalMaterial->m_Indices.AddToTail( indices[i] ); + build.m_pDecalMaterial->m_Indices.AddToTail( indices[i+1] ); + } +} + + +//----------------------------------------------------------------------------- +// Clips the triangle to +/- radius +//----------------------------------------------------------------------------- +bool CStudioRender::ClipDecal( DecalBuildInfo_t& build, int i1, int i2, int i3, int *pClipFlags ) +{ + int i; + + DecalClipState_t clipState; + clipState.m_VertCount = 3; + ConvertMeshVertexToDecalVertex( build, i1, clipState.m_ClipVerts[0] ); + ConvertMeshVertexToDecalVertex( build, i2, clipState.m_ClipVerts[1] ); + ConvertMeshVertexToDecalVertex( build, i3, clipState.m_ClipVerts[2] ); + clipState.m_ClipVertCount = 3; + + for ( i = 0; i < 3; ++i) + { + clipState.m_ClipFlags[i] = pClipFlags[i]; + clipState.m_Indices[0][i] = i; + } + clipState.m_Pass = 0; + + // Clip against each plane + ClipTriangleAgainstPlane( clipState, 0, DECAL_CLIP_MINUSU, 0.0f ); + if (clipState.m_VertCount < 3) + return false; + + ClipTriangleAgainstPlane( clipState, 0, DECAL_CLIP_PLUSU, 1.0f ); + if (clipState.m_VertCount < 3) + return false; + + ClipTriangleAgainstPlane( clipState, 1, DECAL_CLIP_MINUSV, 0.0f ); + if (clipState.m_VertCount < 3) + return false; + + ClipTriangleAgainstPlane( clipState, 1, DECAL_CLIP_PLUSV, 1.0f ); + if (clipState.m_VertCount < 3) + return false; + + // Only add the clipped decal to the triangle if it's one bone + // otherwise just return if it was clipped + if ( build.m_UseClipVert ) + { + AddClippedDecalToTriangle( build, clipState ); + } + + return true; +} + + +//----------------------------------------------------------------------------- +// Adds a decal to a triangle, but only if it should +//----------------------------------------------------------------------------- +void CStudioRender::AddTriangleToDecal( DecalBuildInfo_t& build, int i1, int i2, int i3, int gi1, int gi2, int gi3 ) +{ + DecalBuildVertexInfo_t* pVertexInfo = build.m_pVertexInfo; + + // All must be front-facing for a decal to be added + // FIXME: Could make it work if not all are front-facing, need clipping for that + int nAllFrontFacing = pVertexInfo[i1].m_Flags & pVertexInfo[i2].m_Flags & pVertexInfo[i3].m_Flags; + if ( ( nAllFrontFacing & DecalBuildVertexInfo_t::FRONT_FACING ) == 0 ) + return; + + // This is used to prevent poke through; if the points are too far away + // from the contact point, then don't add the decal + int nAllNotInValidArea = pVertexInfo[i1].m_Flags | pVertexInfo[i2].m_Flags | pVertexInfo[i3].m_Flags; + if ( ( nAllNotInValidArea & DecalBuildVertexInfo_t::VALID_AREA ) == 0 ) + return; + + // Clip to +/- radius + int clipFlags[3]; + + clipFlags[0] = ComputeClipFlags( pVertexInfo, i1 ); + clipFlags[1] = ComputeClipFlags( pVertexInfo, i2 ); + clipFlags[2] = ComputeClipFlags( pVertexInfo, i3 ); + + // Cull... The result is non-zero if they're all outside the same plane + if ( (clipFlags[0] & (clipFlags[1] & clipFlags[2]) ) != 0) + return; + + bool doClip = true; + + // Trivial accept for skinned polys... if even one vert is inside + // the draw region, accept + if ((!build.m_UseClipVert) && ( !clipFlags[0] || !clipFlags[1] || !clipFlags[2] )) + { + doClip = false; + } + + // Trivial accept... no clip flags set means all in + // Don't clip if we have more than one bone... we'll need to do skinning + // and we can't clip the bone indices + // We *do* want to clip in the one bone case though; useful for large + // static props. + if ( doClip && ( clipFlags[0] || clipFlags[1] || clipFlags[2] )) + { + bool validTri = ClipDecal( build, i1, i2, i3, clipFlags ); + + // Don't add the triangle if we culled the triangle or if + // we had one or less bones + if (build.m_UseClipVert || (!validTri)) + return; + } + + // Add the vertices to the decal since there was no clipping + i1 = AddVertexToDecal( build, i1, gi1 ); + i2 = AddVertexToDecal( build, i2, gi2 ); + i3 = AddVertexToDecal( build, i3, gi3 ); + + MEM_ALLOC_CREDIT(); + build.m_pDecalMaterial->m_Indices.AddToTail(i1); + build.m_pDecalMaterial->m_Indices.AddToTail(i2); + build.m_pDecalMaterial->m_Indices.AddToTail(i3); +} + + +//----------------------------------------------------------------------------- +// Adds a decal to a mesh +//----------------------------------------------------------------------------- +void CStudioRender::AddDecalToMesh( DecalBuildInfo_t& build ) +{ + MeshVertexInfo_t &vertexInfo = build.m_pMeshVertices[ build.m_nGlobalMeshIndex ]; + if ( vertexInfo.m_nIndex < 0 ) + return; + + build.m_pVertexInfo = &build.m_pVertexBuffer[ vertexInfo.m_nIndex ]; + + // Draw all the various mesh groups... + for ( int j = 0; j < build.m_pMeshData->m_NumGroup; ++j ) + { + build.m_Group = j; + studiomeshgroup_t* pGroup = &build.m_pMeshData->m_pMeshGroup[j]; + + // Must add decal to each strip in the strip group + // We do this so we can re-use all of the bone state change + // info associated with the strips + for (int k = 0; k < pGroup->m_NumStrips; ++k) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[k]; + if (pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP) + { + for (int i = 0; i < pStrip->numIndices - 2; ++i) + { + bool ccw = (i & 0x1) == 0; + int ti1 = pStrip->indexOffset + i; + int ti2 = ti1+1+ccw; + int ti3 = ti1+2-ccw; + int i1 = pGroup->MeshIndex(ti1); + int i2 = pGroup->MeshIndex(ti2); + int i3 = pGroup->MeshIndex(ti3); + + AddTriangleToDecal( build, i1, i2, i3, pGroup->m_pIndices[ti1], pGroup->m_pIndices[ti2], pGroup->m_pIndices[ti3] ); + } + } + else + { + Assert( pStrip->flags & OptimizedModel::STRIP_IS_TRILIST ); + for (int i = 0; i < pStrip->numIndices; i += 3) + { + int idx = pStrip->indexOffset + i; + + int i1 = pGroup->MeshIndex(idx); + int i2 = pGroup->MeshIndex(idx+1); + int i3 = pGroup->MeshIndex(idx+2); + + AddTriangleToDecal( build, i1, i2, i3, pGroup->m_pIndices[idx], pGroup->m_pIndices[idx+1], pGroup->m_pIndices[idx+2] ); + } + } + } + } +} + +//----------------------------------------------------------------------------- +// Adds a decal to a mesh +//----------------------------------------------------------------------------- +bool CStudioRender::AddDecalToModel( DecalBuildInfo_t& buildInfo ) +{ + // FIXME: We need to do some high-level culling to figure out exactly + // which meshes we need to add the decals to + // Turns out this solution may also be good for mesh sorting + // we need to know the center of each mesh, could also store a + // bounding radius for each mesh and test the ray against each sphere. + + for ( int i = 0; i < m_pSubModel->nummeshes; ++i) + { + buildInfo.m_Mesh = i; + buildInfo.m_pMesh = m_pSubModel->pMesh(i); + buildInfo.m_pMeshData = &m_pStudioMeshes[buildInfo.m_pMesh->meshid]; + Assert(buildInfo.m_pMeshData); + // Grab either fat or thin vertex data + buildInfo.m_pMeshVertexData = buildInfo.m_pMesh->GetVertexData( buildInfo.m_pStudioHdr ); + if ( buildInfo.m_pMeshVertexData == NULL ) + { + buildInfo.m_pMeshThinVertexData = buildInfo.m_pMesh->GetThinVertexData( buildInfo.m_pStudioHdr ); + if ( !buildInfo.m_pMeshThinVertexData ) + return false; + } + + AddDecalToMesh( buildInfo ); + ++buildInfo.m_nGlobalMeshIndex; + } + return true; +} + + +//----------------------------------------------------------------------------- +// Computes the pose to decal plane transform +//----------------------------------------------------------------------------- +bool CStudioRender::ComputePoseToDecal( const Ray_t& ray, const Vector& up ) +{ + // Create a transform that projects world coordinates into a + // basis for the decal + matrix3x4_t worldToDecal; + Vector decalU, decalV, decalN; + + // Get the z axis + VectorMultiply( ray.m_Delta, -1.0f, decalN ); + if (VectorNormalize( decalN ) == 0.0f) + return false; + + // Deal with the u axis + CrossProduct( up, decalN, decalU ); + if ( VectorNormalize( decalU ) < 1e-3 ) + { + // if up parallel or antiparallel to ray, deal... + Vector fixup( up.y, up.z, up.x ); + CrossProduct( fixup, decalN, decalU ); + if ( VectorNormalize( decalU ) < 1e-3 ) + return false; + } + + CrossProduct( decalN, decalU, decalV ); + + // Since I want world-to-decal, I gotta take the inverse of the decal + // to world. Assuming post-multiplying column vectors, the decal to world = + // [ Ux Vx Nx | ray.m_Start[0] ] + // [ Uy Vy Ny | ray.m_Start[1] ] + // [ Uz Vz Nz | ray.m_Start[2] ] + + VectorCopy( decalU.Base(), worldToDecal[0] ); + VectorCopy( decalV.Base(), worldToDecal[1] ); + VectorCopy( decalN.Base(), worldToDecal[2] ); + + worldToDecal[0][3] = -DotProduct( ray.m_Start.Base(), worldToDecal[0] ); + worldToDecal[1][3] = -DotProduct( ray.m_Start.Base(), worldToDecal[1] ); + worldToDecal[2][3] = -DotProduct( ray.m_Start.Base(), worldToDecal[2] ); + + // Compute transforms from pose space to decal plane space + for ( int i = 0; i < m_pStudioHdr->numbones; i++) + { + ConcatTransforms( worldToDecal, m_PoseToWorld[i], m_PoseToDecal[i] ); + } + + return true; +} + + +//----------------------------------------------------------------------------- +// Gets the list of triangles for a particular material and lod +//----------------------------------------------------------------------------- + +int CStudioRender::GetDecalMaterial( DecalLod_t& decalLod, IMaterial* pDecalMaterial ) +{ + // Grab the material for this lod... + unsigned short j; + for ( j = decalLod.m_FirstMaterial; j != m_DecalMaterial.InvalidIndex(); j = m_DecalMaterial.Next(j) ) + { + if (m_DecalMaterial[j].m_pMaterial == pDecalMaterial) + { + return j; + } + } + + // If we got here, this must be the first time we saw this material + j = m_DecalMaterial.Alloc( true ); + + // Link it into the list of data for this lod + if (decalLod.m_FirstMaterial != m_DecalMaterial.InvalidIndex() ) + m_DecalMaterial.LinkBefore( decalLod.m_FirstMaterial, j ); + decalLod.m_FirstMaterial = j; + + m_DecalMaterial[j].m_pMaterial = pDecalMaterial; + + return j; +} + +//----------------------------------------------------------------------------- +// Purpose: +//----------------------------------------------------------------------------- +void CStudioRender::RetireDecal( DecalModelList_t &list, DecalId_t nRetireID, int iLOD, int iMaxLOD ) +{ + // Remove it from the global LRU... + DecalLRUListIndex_t i; + for ( i = m_DecalLRU.Head(); i != m_DecalLRU.InvalidIndex(); i = m_DecalLRU.Next( i ) ) + { + if ( nRetireID == m_DecalLRU[i].m_nDecalId ) + { + m_DecalLRU.Remove( i ); + break; + } + } + Assert( i != m_DecalLRU.InvalidIndex() ); + + // Find the id to retire and retire all the decals with this id across all LODs. + DecalHistoryList_t *pHistoryList = &list.m_pLod[iLOD].m_DecalHistory; + Assert( pHistoryList->Count() ); + if ( !pHistoryList->Count() ) + return; + + DecalHistory_t *pDecalHistory = &pHistoryList->Element( pHistoryList->Head() ); + + // Retire this decal in all lods. + for ( int iLod = ( iMaxLOD - 1 ); iLod >= list.m_pHardwareData->m_RootLOD; --iLod ) + { + pHistoryList = &list.m_pLod[iLod].m_DecalHistory; + if ( !pHistoryList ) + continue; + + unsigned short iList = pHistoryList->Head(); + unsigned short iNext = pHistoryList->InvalidIndex(); + + while ( iList != pHistoryList->InvalidIndex() ) + { + iNext = pHistoryList->Next( iList ); + + pDecalHistory = &pHistoryList->Element( iList ); + if ( !pDecalHistory || pDecalHistory->m_nId != nRetireID ) + { + iList = iNext; + continue; + } + + // Find the decal material for the decal to remove + DecalMaterial_t *pMaterial = &m_DecalMaterial[pDecalHistory->m_Material]; + if ( pMaterial ) + { + // @Note!! Decals must be removed in the reverse order they are added. This code + // assumes that the decal to remove is the oldest one on the model, and therefore + // its vertices start at the head of the list + DecalVertexList_t &vertices = pMaterial->m_Vertices; + Decal_t &decalToRemove = pMaterial->m_Decals[pDecalHistory->m_Decal]; + + // Now clear out the vertices referenced by the indices.... + DecalVertexList_t::IndexType_t next; + DecalVertexList_t::IndexType_t vert = vertices.Head(); + Assert( vertices.Count() >= decalToRemove.m_VertexCount ); + int vertsToRemove = decalToRemove.m_VertexCount; + while ( vertsToRemove > 0 ) + { + // blat out the vertices + next = vertices.Next( vert ); + vertices.Remove( vert ); + vert = next; + g_nTotalDecalVerts--; + + --vertsToRemove; + } + + if ( vertices.Count() == 0 ) + { + vertices.Purge(); + } + + // FIXME: This does a memmove. How expensive is it? + pMaterial->m_Indices.RemoveMultiple( 0, decalToRemove.m_IndexCount ); + if ( pMaterial->m_Indices.Count() == 0) + { + pMaterial->m_Indices.Purge(); + } + + // Remove the decal + pMaterial->m_Decals.Remove( pDecalHistory->m_Decal ); + if ( pMaterial->m_Decals.Count() == 0) + { +#if 1 + pMaterial->m_Decals.Purge(); +#else + if ( list.m_pLod[iLOD].m_FirstMaterial == pDecalHistory->m_Material ) + { + list.m_pLod[iLOD].m_FirstMaterial = m_DecalMaterial.Next( pDecalHistory->m_Material ); + } + m_DecalMaterial.Free( pDecalHistory->m_Material ); +#endif + } + } + + // Clear the decal out of the history + pHistoryList->Remove( iList ); + + // Next element. + iList = iNext; + } + } +} + +//----------------------------------------------------------------------------- +// Adds a decal to the history list +//----------------------------------------------------------------------------- +int CStudioRender::AddDecalToMaterialList( DecalMaterial_t* pMaterial ) +{ + DecalList_t& decalList = pMaterial->m_Decals; + return decalList.AddToTail(); +} + + +//----------------------------------------------------------------------------- +// Total number of meshes we have to deal with +//----------------------------------------------------------------------------- +int CStudioRender::ComputeTotalMeshCount( int iRootLOD, int iMaxLOD, int body ) const +{ + int nMeshCount = 0; + for ( int k=0 ; k < m_pStudioHdr->numbodyparts ; k++) + { + mstudiomodel_t *pSubModel; + R_StudioSetupModel( k, body, &pSubModel, m_pStudioHdr ); + nMeshCount += pSubModel->nummeshes; + } + + nMeshCount *= iMaxLOD-iRootLOD+1; + + return nMeshCount; +} + + +//----------------------------------------------------------------------------- +// Set up the locations for vertices to use +//----------------------------------------------------------------------------- +int CStudioRender::ComputeVertexAllocation( int iMaxLOD, int body, studiohwdata_t *pHardwareData, MeshVertexInfo_t *pMeshVertices ) +{ + bool bSuppressTlucDecal = (m_pStudioHdr->flags & STUDIOHDR_FLAGS_TRANSLUCENT_TWOPASS) != 0; + + int nCurrMesh = 0; + int nVertexCount = 0; + for ( int i = iMaxLOD-1; i >= pHardwareData->m_RootLOD; i--) + { + IMaterial **ppMaterials = pHardwareData->m_pLODs[i].ppMaterials; + + for ( int k=0 ; k < m_pStudioHdr->numbodyparts ; k++) + { + mstudiomodel_t *pSubModel; + R_StudioSetupModel( k, body, &pSubModel, m_pStudioHdr ); + + for ( int meshID = 0; meshID < pSubModel->nummeshes; ++meshID, ++nCurrMesh) + { + mstudiomesh_t *pMesh = pSubModel->pMesh(meshID); + + pMeshVertices[nCurrMesh].m_pMesh = pMesh; + + int n; + for ( n = nCurrMesh; --n >= 0; ) + { + if ( pMeshVertices[n].m_pMesh == pMesh ) + { + pMeshVertices[nCurrMesh].m_nIndex = pMeshVertices[n].m_nIndex; + break; + } + } + if ( n >= 0 ) + continue; + + // Don't add to the mesh if the mesh has a translucent material + short *pSkinRef = m_pStudioHdr->pSkinref( 0 ); + IMaterial *pMaterial = ppMaterials[pSkinRef[pMesh->material]]; + if (bSuppressTlucDecal) + { + if (pMaterial->IsTranslucent()) + { + pMeshVertices[nCurrMesh].m_nIndex = -1; + continue; + } + } + + if ( pMaterial->GetMaterialVarFlag( MATERIAL_VAR_SUPPRESS_DECALS ) ) + { + pMeshVertices[nCurrMesh].m_nIndex = -1; + continue; + } + + pMeshVertices[nCurrMesh].m_nIndex = nVertexCount; + nVertexCount += pMesh->numvertices; + } + } + } + + return nVertexCount; +} + + +//----------------------------------------------------------------------------- +// Project decals onto all meshes +//----------------------------------------------------------------------------- +void CStudioRender::ProjectDecalsOntoMeshes( DecalBuildInfo_t& build, int nMeshCount ) +{ + int nMaxVertexIndex = -1; + + for ( int i = 0; i < nMeshCount; ++i ) + { + int nIndex = build.m_pMeshVertices[i].m_nIndex; + + // No mesh, or have we already projected this? + if (( nIndex < 0 ) || ( nIndex <= nMaxVertexIndex )) + continue; + + nMaxVertexIndex = nIndex; + + // Project all vertices for this group into decal space + ProjectDecalOntoMesh( build, &build.m_pVertexBuffer[ nIndex ], build.m_pMeshVertices[i].m_pMesh ); + } +} + + + +//----------------------------------------------------------------------------- +// Add decals to a decal list by doing a planar projection along the ray +//----------------------------------------------------------------------------- +void CStudioRender::AddDecal( StudioDecalHandle_t hDecal, const StudioRenderContext_t& rc, matrix3x4_t *pBoneToWorld, + studiohdr_t *pStudioHdr, const Ray_t& ray, const Vector& decalUp, IMaterial* pDecalMaterial, + float radius, int body, bool noPokethru, int maxLODToDecal ) +{ + VPROF( "CStudioRender::AddDecal" ); + + if ( hDecal == STUDIORENDER_DECAL_INVALID ) + return; + + // For each lod, build the decal list + int h = (int)hDecal; + DecalModelList_t& list = m_DecalList[h]; + + if ( list.m_pHardwareData->m_NumStudioMeshes == 0 ) + return; + + m_pRC = const_cast< StudioRenderContext_t* >( &rc ); + m_pStudioHdr = pStudioHdr; + m_pBoneToWorld = pBoneToWorld; + + // Bone to world must be set before calling AddDecal; it uses that here + // UNDONE: Use current LOD to cull matrices here? + ComputePoseToWorld( m_PoseToWorld, pStudioHdr, BONE_USED_BY_ANYTHING, m_pRC->m_ViewOrigin, m_pBoneToWorld ); + + // Compute transforms from pose space to decal plane space + if (!ComputePoseToDecal( ray, decalUp )) + { + m_pStudioHdr = NULL; + m_pRC = NULL; + m_pBoneToWorld = NULL; + return; + } + + // Get dynamic information from the material (fade start, fade time) + float fadeStartTime = 0.0f; + float fadeDuration = 0.0f; + int flags = 0; + + // This sucker is state needed only when building decals + DecalBuildInfo_t buildInfo; + buildInfo.m_Radius = radius; + buildInfo.m_NoPokeThru = noPokethru; + buildInfo.m_pStudioHdr = pStudioHdr; + buildInfo.m_UseClipVert = ( m_pStudioHdr->numbones <= 1 ) && ( m_pStudioHdr->numflexdesc == 0 ); + buildInfo.m_nGlobalMeshIndex = 0; + buildInfo.m_pMeshVertexData = NULL; + + // Find out which LODs we're defacing + int iMaxLOD; + if ( maxLODToDecal == ADDDECAL_TO_ALL_LODS ) + { + iMaxLOD = list.m_pHardwareData->m_NumLODs; + } + else + { + iMaxLOD = min( list.m_pHardwareData->m_NumLODs, maxLODToDecal ); + } + + // Allocate space for all projected mesh vertices. We do this to prevent + // re-projection of the same meshes when they appear in multiple LODs + int nMeshCount = ComputeTotalMeshCount( list.m_pHardwareData->m_RootLOD, iMaxLOD-1, body ); + + // NOTE: This is a consequence of the sizeof (m_UniqueID) + if ( nMeshCount >= 255 ) + { + Warning("Unable to apply decals to model (%s), it has more than 255 unique meshes!\n", m_pStudioHdr->pszName() ); + m_pStudioHdr = NULL; + m_pRC = NULL; + m_pBoneToWorld = NULL; + return; + } + + if ( !IsX360() ) + { + buildInfo.m_pMeshVertices = (MeshVertexInfo_t*)stackalloc( nMeshCount * sizeof(MeshVertexInfo_t) ); + int nVertexCount = ComputeVertexAllocation( iMaxLOD, body, list.m_pHardwareData, buildInfo.m_pMeshVertices ); + buildInfo.m_pVertexBuffer = (DecalBuildVertexInfo_t*)stackalloc( nVertexCount * sizeof(DecalBuildVertexInfo_t) ); + } + else + { + // Don't allocate on the stack + buildInfo.m_pMeshVertices = (MeshVertexInfo_t*)malloc( nMeshCount * sizeof(MeshVertexInfo_t) ); + int nVertexCount = ComputeVertexAllocation( iMaxLOD, body, list.m_pHardwareData, buildInfo.m_pMeshVertices ); + buildInfo.m_pVertexBuffer = (DecalBuildVertexInfo_t*)malloc( nVertexCount * sizeof(DecalBuildVertexInfo_t) ); + } + + // Project all mesh vertices + ProjectDecalsOntoMeshes( buildInfo, nMeshCount ); + + if ( IsX360() ) + { + while ( g_nTotalDecalVerts * sizeof(DecalVertex_t) > 256*1024 && m_DecalLRU.Head() != m_DecalLRU.InvalidIndex() ) + { + DecalId_t nRetireID = m_DecalLRU[ m_DecalLRU.Head() ].m_nDecalId; + StudioDecalHandle_t hRetire = m_DecalLRU[ m_DecalLRU.Head() ].m_hDecalHandle; + DecalModelList_t &modelList = m_DecalList[(int)hRetire]; + RetireDecal( modelList, nRetireID, modelList.m_pHardwareData->m_RootLOD, modelList.m_pHardwareData->m_NumLODs ); + } + } + + // Check to see if we have too many decals on this model + // This assumes that every decal is applied to the root lod at least + int nRootLOD = list.m_pHardwareData->m_RootLOD; + int nFinalLOD = list.m_pHardwareData->m_NumLODs; + DecalHistoryList_t *pHistoryList = &list.m_pLod[list.m_pHardwareData->m_RootLOD].m_DecalHistory; + if ( m_DecalLRU.Count() >= m_pRC->m_Config.maxDecalsPerModel * 1.5 ) + { + DecalId_t nRetireID = m_DecalLRU[ m_DecalLRU.Head() ].m_nDecalId; + StudioDecalHandle_t hRetire = m_DecalLRU[ m_DecalLRU.Head() ].m_hDecalHandle; + DecalModelList_t &modelList = m_DecalList[(int)hRetire]; + RetireDecal( modelList, nRetireID, modelList.m_pHardwareData->m_RootLOD, modelList.m_pHardwareData->m_NumLODs ); + } + + if ( pHistoryList->Count() >= m_pRC->m_Config.maxDecalsPerModel ) + { + DecalHistory_t *pDecalHistory = &pHistoryList->Element( pHistoryList->Head() ); + DecalId_t nRetireID = pDecalHistory->m_nId; + StudioDecalHandle_t hRetire = hDecal; + RetireDecal( m_DecalList[(int)hRetire], nRetireID, nRootLOD, nFinalLOD ); + } + + // Search all LODs for an overflow condition and retire those also + for ( int i = iMaxLOD-1; i >= list.m_pHardwareData->m_RootLOD; i-- ) + { + // Grab the list of all decals using the same material for this lod... + int materialIdx = GetDecalMaterial( list.m_pLod[i], pDecalMaterial ); + + // Check to see if we should retire the decal + DecalMaterial_t *pDecalMaterial = &m_DecalMaterial[materialIdx]; + while ( pDecalMaterial->m_Indices.Count() > MAX_DECAL_INDICES_PER_MODEL ) + { + DecalHistoryList_t *pHistoryList = &list.m_pLod[i].m_DecalHistory; + DecalHistory_t *pDecalHistory = &pHistoryList->Element( pHistoryList->Head() ); + RetireDecal( list, pDecalHistory->m_nId, nRootLOD, nFinalLOD ); + } + } + + // Gotta do this for all LODs + bool bAddedDecals = false; + for ( int i = iMaxLOD-1; i >= list.m_pHardwareData->m_RootLOD; i-- ) + { + // Grab the list of all decals using the same material for this lod... + int materialIdx = GetDecalMaterial( list.m_pLod[i], pDecalMaterial ); + buildInfo.m_pDecalMaterial = &m_DecalMaterial[materialIdx]; + + // Grab the meshes for this lod + m_pStudioMeshes = list.m_pHardwareData->m_pLODs[i].m_pMeshData; + + // Don't decal on meshes that are translucent if it's twopass + buildInfo.m_ppMaterials = list.m_pHardwareData->m_pLODs[i].ppMaterials; + + // Set up info needed for vertex sharing + buildInfo.m_FirstVertex = buildInfo.m_pDecalMaterial->m_Vertices.InvalidIndex(); + buildInfo.m_VertexCount = 0; + + int prevIndexCount = buildInfo.m_pDecalMaterial->m_Indices.Count(); + + // Step over all body parts + add decals to em all! + int k; + for ( k=0 ; k < m_pStudioHdr->numbodyparts ; k++) + { + // Grab the model for this body part + int model = R_StudioSetupModel( k, body, &m_pSubModel, m_pStudioHdr ); + buildInfo.m_Body = k; + buildInfo.m_Model = model; + if ( !AddDecalToModel( buildInfo ) ) + break; + } + + if ( k != m_pStudioHdr->numbodyparts ) + continue; + + // Add this to the list of decals in this material + if ( buildInfo.m_VertexCount ) + { + bAddedDecals = true; + + int decalIndexCount = buildInfo.m_pDecalMaterial->m_Indices.Count() - prevIndexCount; + Assert(decalIndexCount > 0); + + int decalIndex = AddDecalToMaterialList( buildInfo.m_pDecalMaterial ); + Decal_t& decal = buildInfo.m_pDecalMaterial->m_Decals[decalIndex]; + decal.m_VertexCount = buildInfo.m_VertexCount; + decal.m_IndexCount = decalIndexCount; + decal.m_FadeStartTime = fadeStartTime; + decal.m_FadeDuration = fadeDuration; + decal.m_Flags = flags; + + // Add this decal to the history... + int h = list.m_pLod[i].m_DecalHistory.AddToTail(); + list.m_pLod[i].m_DecalHistory[h].m_Material = materialIdx; + list.m_pLod[i].m_DecalHistory[h].m_Decal = decalIndex; + list.m_pLod[i].m_DecalHistory[h].m_nId = m_nDecalId; + list.m_pLod[i].m_DecalHistory[h].m_nPad = 0; + } + } + + // Add to LRU + if ( bAddedDecals ) + { + DecalLRUListIndex_t h = m_DecalLRU.AddToTail(); + m_DecalLRU[h].m_nDecalId = m_nDecalId; + m_DecalLRU[h].m_hDecalHandle = hDecal; + + // Increment count. + ++m_nDecalId; + } + + if ( IsX360() ) + { + free( buildInfo.m_pMeshVertices ); + free( buildInfo.m_pVertexBuffer ); + } + + m_pStudioHdr = NULL; + m_pRC = NULL; + m_pBoneToWorld = NULL; +} + + +//----------------------------------------------------------------------------- +// +// This code here is all about rendering the decals +// +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// Inner loop for rendering decals that have a single bone +//----------------------------------------------------------------------------- + +void CStudioRender::DrawSingleBoneDecals( CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial ) +{ + // We don't got no bones, so yummy yummy yum, just copy the data out + // Static props should go though this code path + + DecalVertexList_t& verts = decalMaterial.m_Vertices; + for ( DecalVertexList_t::IndexLocalType_t i = verts.Head(); i != verts.InvalidIndex(); i = verts.Next(i) ) + { + DecalVertex_t& vertex = verts[i]; + + meshBuilder.Position3fv( vertex.m_Position.Base() ); + meshBuilder.Normal3fv( GetVecNormal( vertex.m_Normal ).Base() ); +#if 0 + if ( decalMaterial.m_pMaterial->InMaterialPage() ) + { + float offset[2], scale[2]; + decalMaterial.m_pMaterial->GetMaterialOffset( offset ); + decalMaterial.m_pMaterial->GetMaterialScale( scale ); + + Vector2D vecTexCoord( vertex.m_TexCoord.x, vertex.m_TexCoord.y ); + vecTexCoord.x = clamp( vecTexCoord.x, 0.0f, 1.0f ); + vecTexCoord.y = clamp( vecTexCoord.y, 0.0f, 1.0f ); + meshBuilder.TexCoordSubRect2f( 0, vecTexCoord.x, vecTexCoord.y, offset[0], offset[1], scale[0], scale[1] ); + +// meshBuilder.TexCoordSubRect2f( 0, vertex.m_TexCoord.x, vertex.m_TexCoord.y, offset[0], offset[1], scale[0], scale[1] ); + } + else +#endif + { + meshBuilder.TexCoord2fv( 0, GetVecTexCoord(vertex.m_TexCoord).Base() ); + } + meshBuilder.Color4ub( 255, 255, 255, 255 ); + + if ( meshBuilder.NumBoneWeights() > 0 ) // bone weight of 0 will not write anything, so these calls would be wasted + { + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + } + + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + + meshBuilder.AdvanceVertex(); + } +} + +void CStudioRender::DrawSingleBoneFlexedDecals( IMatRenderContext *pRenderContext, CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial ) +{ + // We don't got no bones, so yummy yummy yum, just copy the data out + // Static props should go though this code path + DecalVertexList_t& verts = decalMaterial.m_Vertices; + for ( DecalVertexList_t::IndexLocalType_t i = verts.Head(); i != verts.InvalidIndex(); i = verts.Next(i) ) + { + DecalVertex_t& vertex = verts[i]; + + // Clipped verts shouldn't come through here, only static props should use clipped + Assert ( vertex.m_MeshVertexIndex >= 0 ); + + m_VertexCache.SetBodyModelMesh( vertex.m_Body, vertex.m_Model, vertex.m_Mesh ); + if (m_VertexCache.IsVertexFlexed( vertex.m_MeshVertexIndex )) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex( vertex.m_MeshVertexIndex ); + meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() ); + meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() ); + } + else + { + meshBuilder.Position3fv( vertex.m_Position.Base() ); + meshBuilder.Normal3fv( GetVecNormal( vertex.m_Normal ).Base() ); + } + +#if 0 + if ( decalMaterial.m_pMaterial->InMaterialPage() ) + { + float offset[2], scale[2]; + decalMaterial.m_pMaterial->GetMaterialOffset( offset ); + decalMaterial.m_pMaterial->GetMaterialScale( scale ); + + Vector2D vecTexCoord( vertex.m_TexCoord.x, vertex.m_TexCoord.y ); + vecTexCoord.x = clamp( vecTexCoord.x, 0.0f, 1.0f ); + vecTexCoord.y = clamp( vecTexCoord.y, 0.0f, 1.0f ); + meshBuilder.TexCoordSubRect2f( 0, vecTexCoord.x, vecTexCoord.y, offset[0], offset[1], scale[0], scale[1] ); + +// meshBuilder.TexCoordSubRect2f( 0, vertex.m_TexCoord.x, vertex.m_TexCoord.y, offset[0], offset[1], scale[0], scale[1] ); + } + else +#endif + { + meshBuilder.TexCoord2fv( 0, GetVecTexCoord(vertex.m_TexCoord).Base() ); + } + + meshBuilder.Color4ub( 255, 255, 255, 255 ); + + if ( meshBuilder.NumBoneWeights() > 0 ) // bone weight of 0 will not write anything, so these calls would be wasted + { + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + } + + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + + meshBuilder.AdvanceVertex(); + } +} + +//----------------------------------------------------------------------------- +// Inner loop for rendering decals that have multiple bones +//----------------------------------------------------------------------------- +bool CStudioRender::DrawMultiBoneDecals( CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial, studiohdr_t *pStudioHdr ) +{ + const thinModelVertices_t *thinVertData = NULL; + const mstudio_meshvertexdata_t *vertData = NULL; + mstudiomesh_t *pLastMesh = NULL; + + DecalVertexList_t& verts = decalMaterial.m_Vertices; + for ( DecalVertexList_t::IndexLocalType_t i = verts.Head(); i != verts.InvalidIndex(); i = verts.Next(i) ) + { + DecalVertex_t& vertex = verts[i]; + + int n = vertex.m_MeshVertexIndex; + + Assert( n < MAXSTUDIOVERTS ); + + mstudiomesh_t *pMesh = vertex.GetMesh( pStudioHdr ); + Assert( pMesh ); + + m_VertexCache.SetBodyModelMesh( vertex.m_Body, vertex.m_Model, vertex.m_Mesh ); + if (m_VertexCache.IsVertexPositionCached( n )) + { + CachedPosNorm_t* pCachedVert = m_VertexCache.GetWorldVertex( n ); + meshBuilder.Position3fv( pCachedVert->m_Position.Base() ); + meshBuilder.Normal3fv( pCachedVert->m_Normal.Base() ); + } + else + { + // Prevent the computation of this again.... + m_VertexCache.SetupComputation(pMesh); + CachedPosNorm_t* pCachedVert = m_VertexCache.CreateWorldVertex( n ); + + if ( pLastMesh != pMesh ) + { + // only if the mesh changes + pLastMesh = pMesh; + vertData = pMesh->GetVertexData( pStudioHdr ); + if ( vertData ) + thinVertData = NULL; + else + thinVertData = pMesh->GetThinVertexData( pStudioHdr ); + } + + if ( vertData ) + { + mstudioboneweight_t* pBoneWeights = vertData->BoneWeights( n ); + // FIXME: could be faster to blend the matrices and then transform the pos+norm by the same matrix + R_StudioTransform( *vertData->Position( n ), pBoneWeights, pCachedVert->m_Position.AsVector3D() ); + R_StudioRotate( *vertData->Normal( n ), pBoneWeights, pCachedVert->m_Normal.AsVector3D() ); + } + else if ( thinVertData ) + { + // Using compressed vertex data + mstudioboneweight_t boneWeights; + Vector position; + Vector normal; + thinVertData->GetMeshBoneWeights( pMesh, n, &boneWeights ); + thinVertData->GetMeshPosition( pMesh, n, &position ); + thinVertData->GetMeshNormal( pMesh, n, &normal ); + R_StudioTransform( position, &boneWeights, pCachedVert->m_Position.AsVector3D() ); + R_StudioRotate( normal, &boneWeights, pCachedVert->m_Normal.AsVector3D() ); + } + else + { + return false; + } + + // Add a little extra offset for hardware skinning; in that case + // we're doing software skinning for decals and it might not be quite right + VectorMA( pCachedVert->m_Position.AsVector3D(), 0.1, pCachedVert->m_Normal.AsVector3D(), pCachedVert->m_Position.AsVector3D() ); + + meshBuilder.Position3fv( pCachedVert->m_Position.Base() ); + meshBuilder.Normal3fv( pCachedVert->m_Normal.Base() ); + } + +#if 0 + if ( decalMaterial.m_pMaterial->InMaterialPage() ) + { + float offset[2], scale[2]; + decalMaterial.m_pMaterial->GetMaterialOffset( offset ); + decalMaterial.m_pMaterial->GetMaterialScale( scale ); + + Vector2D vecTexCoord( vertex.m_TexCoord.x, vertex.m_TexCoord.y ); + vecTexCoord.x = clamp( vecTexCoord.x, 0.0f, 1.0f ); + vecTexCoord.y = clamp( vecTexCoord.y, 0.0f, 1.0f ); + meshBuilder.TexCoordSubRect2f( 0, vecTexCoord.x, vecTexCoord.y, offset[0], offset[1], scale[0], scale[1] ); + +// meshBuilder.TexCoordSubRect2f( 0, vertex.m_TexCoord.x, vertex.m_TexCoord.y, offset[0], offset[1], scale[0], scale[1] ); + } + else +#endif + { + meshBuilder.TexCoord2fv( 0, GetVecTexCoord(vertex.m_TexCoord).Base() ); + } + + meshBuilder.Color4ub( 255, 255, 255, 255 ); + + if ( meshBuilder.NumBoneWeights() > 0 ) // bone weight of 0 will not write anything, so these calls would be wasted + { + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + } + + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + + meshBuilder.AdvanceVertex(); + } + return true; +} + +bool CStudioRender::DrawMultiBoneFlexedDecals( IMatRenderContext *pRenderContext, CMeshBuilder& meshBuilder, + DecalMaterial_t& decalMaterial, studiohdr_t *pStudioHdr, studioloddata_t *pStudioLOD ) +{ + int *pBoneRemap = pStudioLOD ? pStudioLOD->m_pHWMorphDecalBoneRemap : NULL; + + mstudiomesh_t *pLastMesh = NULL; + const mstudio_meshvertexdata_t *vertData = NULL; + + DecalVertexList_t& verts = decalMaterial.m_Vertices; + for ( DecalVertexList_t::IndexLocalType_t i = verts.Head(); i != verts.InvalidIndex(); i = verts.Next(i) ) + { + DecalVertex_t& vertex = verts[i]; + + int n = vertex.m_MeshVertexIndex; + + mstudiomesh_t *pMesh = vertex.GetMesh( pStudioHdr ); + Assert( pMesh ); + + if ( pLastMesh != pMesh ) + { + // only if the mesh changes + pLastMesh = pMesh; + vertData = pMesh->GetVertexData( pStudioHdr ); + } + + if ( !vertData ) + return false; + + IMorph *pMorph = pBoneRemap ? vertex.GetMorph( m_pStudioHdr, m_pStudioMeshes ) : NULL; + Vector2D morphUV; + if ( pMorph ) + { + Assert( pBoneRemap ); + Assert( vertex.m_GroupIndex != 0xFFFF ); + if ( !pRenderContext->GetMorphAccumulatorTexCoord( &morphUV, pMorph, vertex.m_GroupIndex ) ) + { + pMorph = NULL; + } + } + + if ( !pMorph ) + { + mstudioboneweight_t* pBoneWeights = vertData->BoneWeights( n ); + m_VertexCache.SetBodyModelMesh( vertex.m_Body, vertex.m_Model, vertex.m_Mesh ); + + if ( m_VertexCache.IsVertexPositionCached( n ) ) + { + CachedPosNorm_t* pCachedVert = m_VertexCache.GetWorldVertex( n ); + meshBuilder.Position3fv( pCachedVert->m_Position.Base() ); + meshBuilder.Normal3fv( pCachedVert->m_Normal.Base() ); + } + else + { + // Prevent the computation of this again.... + m_VertexCache.SetupComputation(pMesh); + CachedPosNorm_t* pCachedVert = m_VertexCache.CreateWorldVertex( n ); + + if (m_VertexCache.IsThinVertexFlexed( n )) + { + CachedPosNorm_t* pFlexedVertex = m_VertexCache.GetThinFlexVertex( n ); + Vector vecPosition, vecNormal; + VectorAdd( *vertData->Position( n ), pFlexedVertex->m_Position.AsVector3D(), vecPosition ); + VectorAdd( *vertData->Normal( n ), pFlexedVertex->m_Normal.AsVector3D(), vecNormal ); + R_StudioTransform( vecPosition, pBoneWeights, pCachedVert->m_Position.AsVector3D() ); + R_StudioRotate( vecNormal, pBoneWeights, pCachedVert->m_Normal.AsVector3D() ); + VectorNormalize( pCachedVert->m_Normal.AsVector3D() ); + } + else if (m_VertexCache.IsVertexFlexed( n )) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex( n ); + R_StudioTransform( pFlexedVertex->m_Position, pBoneWeights, pCachedVert->m_Position.AsVector3D() ); + R_StudioRotate( pFlexedVertex->m_Normal, pBoneWeights, pCachedVert->m_Normal.AsVector3D() ); + } + else + { + Assert( pMesh ); + R_StudioTransform( *vertData->Position( n ), pBoneWeights, pCachedVert->m_Position.AsVector3D() ); + R_StudioRotate( *vertData->Normal( n ), pBoneWeights, pCachedVert->m_Normal.AsVector3D() ); + } + + // Add a little extra offset for hardware skinning; in that case + // we're doing software skinning for decals and it might not be quite right + VectorMA( pCachedVert->m_Position.AsVector3D(), 0.1, pCachedVert->m_Normal.AsVector3D(), pCachedVert->m_Position.AsVector3D() ); + + meshBuilder.Position3fv( pCachedVert->m_Position.Base() ); + meshBuilder.Normal3fv( pCachedVert->m_Normal.Base() ); + } + + meshBuilder.Color4ub( 255, 255, 255, 255 ); + meshBuilder.TexCoord2fv( 0, GetVecTexCoord( vertex.m_TexCoord ).Base() ); + meshBuilder.TexCoord3f( 2, 0.0f, 0.0f, 0.0f ); + + // NOTE: Even if HW morphing is active, since we're using bone 0, it will multiply by identity in the shader + if ( meshBuilder.NumBoneWeights() > 0 ) // bone weight of 0 will not write anything, so these calls would be wasted + { + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + } + + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + } + else + { + meshBuilder.Position3fv( vertData->Position( n )->Base() ); + meshBuilder.Normal3fv( vertData->Normal( n )->Base() ); + meshBuilder.Color4ub( 255, 255, 255, 255 ); + meshBuilder.TexCoord2fv( 0, GetVecTexCoord( vertex.m_TexCoord ).Base() ); + meshBuilder.TexCoord3f( 2, morphUV.x, morphUV.y, 1.0f ); + + // NOTE: We should be renormalizing bone weights here like R_AddVertexToMesh does.. + // It's too expensive. Tough noogies. + mstudioboneweight_t* pBoneWeights = vertData->BoneWeights( n ); + Assert( pBoneWeights->numbones <= 3 ); + meshBuilder.BoneWeight( 0, pBoneWeights->weight[ 0 ] ); + meshBuilder.BoneWeight( 1, pBoneWeights->weight[ 1 ] ); + meshBuilder.BoneWeight( 2, 1.0f - pBoneWeights->weight[ 1 ] - pBoneWeights->weight[ 0 ] ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, pBoneRemap[ (unsigned)pBoneWeights->bone[0] ] ); + meshBuilder.BoneMatrix( 1, pBoneRemap[ (unsigned)pBoneWeights->bone[1] ] ); + meshBuilder.BoneMatrix( 2, pBoneRemap[ (unsigned)pBoneWeights->bone[2] ] ); + meshBuilder.BoneMatrix( 3, BONE_MATRIX_INDEX_INVALID ); + } + + meshBuilder.AdvanceVertex(); + } + return true; +} + +//----------------------------------------------------------------------------- +// Draws all the decals using a particular material +//----------------------------------------------------------------------------- +void CStudioRender::DrawDecalMaterial( IMatRenderContext *pRenderContext, DecalMaterial_t& decalMaterial, studiohdr_t *pStudioHdr, studioloddata_t *pStudioLOD ) +{ + // Performance analysis. +// VPROF_BUDGET( "Decals", "Decals" ); + VPROF( "DecalsDrawStudio" ); + + // It's possible for the index count to become zero due to decal retirement + int indexCount = decalMaterial.m_Indices.Count(); + if ( indexCount == 0 ) + return; + + if ( !m_pRC->m_Config.m_bEnableHWMorph ) + { + pStudioLOD = NULL; + } + + bool bUseHWMorphing = ( pStudioLOD && ( pStudioLOD->m_pHWMorphDecalBoneRemap != NULL ) ); + if ( bUseHWMorphing ) + { + pRenderContext->BindMorph( MATERIAL_MORPH_DECAL ); + } + + // Bind the decal material + if ( !m_pRC->m_Config.bWireframeDecals ) + { + pRenderContext->Bind( decalMaterial.m_pMaterial ); + } + else + { + pRenderContext->Bind( m_pMaterialMRMWireframe ); + } + + // Use a dynamic mesh... + IMesh* pMesh = pRenderContext->GetDynamicMesh(); + + int vertexCount = decalMaterial.m_Vertices.Count(); + + CMeshBuilder meshBuilder; + meshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, vertexCount, indexCount ); + + // FIXME: Could make static meshes for these? + // But don't make no static meshes for decals that fade, at least + + // Two possibilities: no/one bones, we let the hardware do all transformation + // or, more than one bone, we do software skinning. + bool bDraw = true; + if ( m_pStudioHdr->numbones <= 1 ) + { + if ( m_pStudioHdr->numflexdesc != 0 ) + { + DrawSingleBoneFlexedDecals( pRenderContext, meshBuilder, decalMaterial ); + } + else + { + DrawSingleBoneDecals( meshBuilder, decalMaterial ); + } + } + else + { + if ( m_pStudioHdr->numflexdesc != 0 ) + { + if ( !DrawMultiBoneFlexedDecals( pRenderContext, meshBuilder, decalMaterial, pStudioHdr, pStudioLOD ) ) + { + bDraw = false; + } + } + else + { + if ( !DrawMultiBoneDecals( meshBuilder, decalMaterial, pStudioHdr ) ) + { + bDraw = false; + } + } + } + + // Set the indices + // This is a little tricky. Because we can retire decals, the indices + // for each decal start at 0. We output all the vertices in order of + // each decal, and then fix up the indices based on how many vertices + // we wrote out for the decals + unsigned short decal = decalMaterial.m_Decals.Head(); + int indicesRemaining = decalMaterial.m_Decals[decal].m_IndexCount; + int vertexOffset = 0; + for ( int i = 0; i < indexCount; ++i) + { + meshBuilder.Index( decalMaterial.m_Indices[i] + vertexOffset ); + meshBuilder.AdvanceIndex(); + if (--indicesRemaining <= 0) + { + vertexOffset += decalMaterial.m_Decals[decal].m_VertexCount; + decal = decalMaterial.m_Decals.Next(decal); + if (decal != decalMaterial.m_Decals.InvalidIndex()) + { + indicesRemaining = decalMaterial.m_Decals[decal].m_IndexCount; + } +#ifdef _DEBUG + else + { + Assert( i + 1 == indexCount ); + } +#endif + } + } + + meshBuilder.End(); + if ( bDraw ) + { + pMesh->Draw(); + } + else + { + pMesh->MarkAsDrawn(); + } + + if ( bUseHWMorphing ) + { + pRenderContext->BindMorph( NULL ); + } +} + + +//----------------------------------------------------------------------------- +// Purpose: Setup the render state for decals if object has lighting baked. +//----------------------------------------------------------------------------- +static Vector s_pWhite[6] = +{ + Vector( 1.0, 1.0, 1.0 ), + Vector( 1.0, 1.0, 1.0 ), + Vector( 1.0, 1.0, 1.0 ), + Vector( 1.0, 1.0, 1.0 ), + Vector( 1.0, 1.0, 1.0 ), + Vector( 1.0, 1.0, 1.0 ) +}; + +bool CStudioRender::PreDrawDecal( IMatRenderContext *pRenderContext, const DrawModelInfo_t &drawInfo ) +{ + if ( !drawInfo.m_bStaticLighting ) + return false; + + // FIXME: This is incredibly bogus, + // it's overwriting lighting state in the context without restoring it! + const Vector *pAmbient; + if ( m_pRC->m_Config.fullbright ) + { + pAmbient = s_pWhite; + m_pRC->m_NumLocalLights = 0; + } + else + { + pAmbient = drawInfo.m_vecAmbientCube; + m_pRC->m_NumLocalLights = CopyLocalLightingState( MAXLOCALLIGHTS, m_pRC->m_LocalLights, + drawInfo.m_nLocalLightCount, drawInfo.m_LocalLightDescs ); + } + + for( int i = 0; i < 6; i++ ) + { + VectorCopy( pAmbient[i], m_pRC->m_LightBoxColors[i].AsVector3D() ); + m_pRC->m_LightBoxColors[i][3] = 1.0f; + } + + SetLightingRenderState(); + return true; +} + + +//----------------------------------------------------------------------------- +// Draws all the decals on a particular model +//----------------------------------------------------------------------------- +void CStudioRender::DrawDecal( const DrawModelInfo_t &drawInfo, int lod, int body ) +{ + StudioDecalHandle_t handle = drawInfo.m_Decals; + if ( handle == STUDIORENDER_DECAL_INVALID ) + return; + + VPROF("CStudioRender::DrawDecal"); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + PreDrawDecal( pRenderContext, drawInfo ); + + // All decal vertex data is are stored in pose space + // So as long as the pose-to-world transforms are set, we're all ready! + + // FIXME: Body stuff isn't hooked in at all for decals + + // Get the decal list for this lod + const DecalModelList_t& list = m_DecalList[(int)handle]; + m_pStudioHdr = drawInfo.m_pStudioHdr; + + // Add this fix after I fix the other problem. + studioloddata_t *pStudioLOD = NULL; + if ( m_pStudioHdr->numbones <= 1 ) + { + pRenderContext->SetNumBoneWeights( m_pStudioHdr->numbones ); + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadMatrix( m_PoseToWorld[0] ); + } + else + { + pStudioLOD = &drawInfo.m_pHardwareData->m_pLODs[lod]; + if ( !m_pRC->m_Config.m_bEnableHWMorph || !pStudioLOD->m_pHWMorphDecalBoneRemap ) + { + pRenderContext->SetNumBoneWeights( 0 ); + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity( ); + } + else + { + // Set up skinning for decal rendering with hw morphs + pRenderContext->SetNumBoneWeights( pStudioLOD->m_nDecalBoneCount ); + + // Bone 0 is always identity; necessary to multiple against non hw-morphed verts + matrix3x4_t identity; + SetIdentityMatrix( identity ); + pRenderContext->LoadBoneMatrix( 0, identity ); + + // Set up the bone state from the mapping computed in ComputeHWMorphDecalBoneRemap + for ( int i = 0; i < m_pStudioHdr->numbones; ++i ) + { + int nHWBone = pStudioLOD->m_pHWMorphDecalBoneRemap[i]; + if ( nHWBone <= 0 ) + continue; + + pRenderContext->LoadBoneMatrix( nHWBone, m_PoseToWorld[i] ); + } + } + } + + // Gotta do this for all LODs + // Draw each set of decals using a particular material + unsigned short mat = list.m_pLod[lod].m_FirstMaterial; + for ( ; mat != m_DecalMaterial.InvalidIndex(); mat = m_DecalMaterial.Next(mat)) + { + DecalMaterial_t& decalMaterial = m_DecalMaterial[mat]; + DrawDecalMaterial( pRenderContext, decalMaterial, m_pStudioHdr, pStudioLOD ); + } +} + + +void CStudioRender::DrawStaticPropDecals( const DrawModelInfo_t &drawInfo, const StudioRenderContext_t &rc, const matrix3x4_t &modelToWorld ) +{ + StudioDecalHandle_t handle = drawInfo.m_Decals; + if (handle == STUDIORENDER_DECAL_INVALID) + return; + + m_pRC = const_cast< StudioRenderContext_t* >( &rc ); + + VPROF("CStudioRender::DrawStaticPropDecals"); + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + PreDrawDecal( pRenderContext, drawInfo ); + + // All decal vertex data is are stored in pose space + // So as long as the pose-to-world transforms are set, we're all ready! + + // FIXME: Body stuff isn't hooked in at all for decals + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadMatrix( modelToWorld ); + + const DecalModelList_t& list = m_DecalList[(int)handle]; + // Gotta do this for all LODs + // Draw each set of decals using a particular material + unsigned short mat = list.m_pLod[drawInfo.m_Lod].m_FirstMaterial; + for ( ; mat != m_DecalMaterial.InvalidIndex(); mat = m_DecalMaterial.Next(mat)) + { + DecalMaterial_t& decalMaterial = m_DecalMaterial[mat]; + DrawDecalMaterial( pRenderContext, decalMaterial, drawInfo.m_pStudioHdr, NULL ); + } + + m_pRC = NULL; +} + diff --git a/studiorender/r_studiodraw.cpp b/studiorender/r_studiodraw.cpp new file mode 100644 index 0000000..5f3565e --- /dev/null +++ b/studiorender/r_studiodraw.cpp @@ -0,0 +1,2986 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=====================================================================================// + +#include "studiorender.h" +#include "studio.h" +#include "materialsystem/imesh.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/imorph.h" +#include "materialsystem/itexture.h" +#include "materialsystem/imaterial.h" +#include "optimize.h" +#include "mathlib/mathlib.h" +#include "mathlib/vector.h" +#include <malloc.h> +#include "mathlib/vmatrix.h" +#include "studiorendercontext.h" +#include "tier2/tier2.h" +#include "tier0/vprof.h" + +//#define PROFILE_STUDIO VPROF +#define PROFILE_STUDIO + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +typedef void (*SoftwareProcessMeshFunc_t)( const mstudio_meshvertexdata_t *, matrix3x4_t *pPoseToWorld, + CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask, + IMaterial *pMaterial); + +//----------------------------------------------------------------------------- +// Forward declarations +//----------------------------------------------------------------------------- + +class IClientEntity; + + +static int boxpnt[6][4] = +{ + { 0, 4, 6, 2 }, // +X + { 0, 1, 5, 4 }, // +Y + { 0, 2, 3, 1 }, // +Z + { 7, 5, 1, 3 }, // -X + { 7, 3, 2, 6 }, // -Y + { 7, 6, 4, 5 }, // -Z +}; + +static TableVector hullcolor[8] = +{ + { 1.0, 1.0, 1.0 }, + { 1.0, 0.5, 0.5 }, + { 0.5, 1.0, 0.5 }, + { 1.0, 1.0, 0.5 }, + { 0.5, 0.5, 1.0 }, + { 1.0, 0.5, 1.0 }, + { 0.5, 1.0, 1.0 }, + { 1.0, 1.0, 1.0 } +}; + + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +static unsigned int s_nTranslucentModelHullCache = 0; +static unsigned int s_nSolidModelHullCache = 0; +void CStudioRender::R_StudioDrawHulls( int hitboxset, bool translucent ) +{ + int i, j; +// float lv; + Vector tmp; + Vector p[8]; + mstudiobbox_t *pbbox; + IMaterialVar *colorVar; + + mstudiohitboxset_t *s = m_pStudioHdr->pHitboxSet( hitboxset ); + if ( !s ) + return; + + pbbox = s->pHitbox( 0 ); + if ( !pbbox ) + return; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + if( translucent ) + { + pRenderContext->Bind( m_pMaterialTranslucentModelHulls ); + colorVar = m_pMaterialTranslucentModelHulls->FindVarFast( "$color", &s_nTranslucentModelHullCache ); + } + else + { + pRenderContext->Bind( m_pMaterialSolidModelHulls ); + colorVar = m_pMaterialSolidModelHulls->FindVarFast( "$color", &s_nSolidModelHullCache ); + } + + + for (i = 0; i < s->numhitboxes; i++) + { + for (j = 0; j < 8; j++) + { + tmp[0] = (j & 1) ? pbbox[i].bbmin[0] : pbbox[i].bbmax[0]; + tmp[1] = (j & 2) ? pbbox[i].bbmin[1] : pbbox[i].bbmax[1]; + tmp[2] = (j & 4) ? pbbox[i].bbmin[2] : pbbox[i].bbmax[2]; + + VectorTransform( tmp, m_pBoneToWorld[pbbox[i].bone], p[j] ); + } + + j = (pbbox[i].group % 8); + g_pMaterialSystem->Flush(); + if( colorVar ) + { + if( translucent ) + { + colorVar->SetVecValue( 0.2f * hullcolor[j].x, 0.2f * hullcolor[j].y, 0.2f * hullcolor[j].z ); + } + else + { + colorVar->SetVecValue( hullcolor[j].x, hullcolor[j].y, hullcolor[j].z ); + } + } + for (j = 0; j < 6; j++) + { +#if 0 + tmp[0] = tmp[1] = tmp[2] = 0; + tmp[j % 3] = (j < 3) ? 1.0 : -1.0; + // R_StudioLighting( &lv, pbbox[i].bone, 0, tmp ); // BUG: not updated +#endif + + IMesh* pMesh = pRenderContext->GetDynamicMesh(); + CMeshBuilder meshBuilder; + meshBuilder.Begin( pMesh, MATERIAL_QUADS, 1 ); + + for (int k = 0; k < 4; ++k) + { + meshBuilder.Position3fv( p[boxpnt[j][k]].Base() ); + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End(); + pMesh->Draw(); + } + } +} + + +void CStudioRender::R_StudioDrawBones (void) +{ + int i, j, k; +// float lv; + Vector tmp; + Vector p[8]; + Vector up, right, forward; + Vector a1; + mstudiobone_t *pbones; + Vector positionArray[4]; + + pbones = m_pStudioHdr->pBone( 0 ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + for (i = 0; i < m_pStudioHdr->numbones; i++) + { + if (pbones[i].parent == -1) + continue; + + k = pbones[i].parent; + + a1[0] = a1[1] = a1[2] = 1.0; + up[0] = m_pBoneToWorld[i][0][3] - m_pBoneToWorld[k][0][3]; + up[1] = m_pBoneToWorld[i][1][3] - m_pBoneToWorld[k][1][3]; + up[2] = m_pBoneToWorld[i][2][3] - m_pBoneToWorld[k][2][3]; + if (up[0] > up[1]) + if (up[0] > up[2]) + a1[0] = 0.0; + else + a1[2] = 0.0; + else + if (up[1] > up[2]) + a1[1] = 0.0; + else + a1[2] = 0.0; + CrossProduct( up, a1, right ); + VectorNormalize( right ); + CrossProduct( up, right, forward ); + VectorNormalize( forward ); + VectorScale( right, 2.0, right ); + VectorScale( forward, 2.0, forward ); + + for (j = 0; j < 8; j++) + { + p[j][0] = m_pBoneToWorld[k][0][3]; + p[j][1] = m_pBoneToWorld[k][1][3]; + p[j][2] = m_pBoneToWorld[k][2][3]; + + if (j & 1) + { + VectorSubtract( p[j], right, p[j] ); + } + else + { + VectorAdd( p[j], right, p[j] ); + } + + if (j & 2) + { + VectorSubtract( p[j], forward, p[j] ); + } + else + { + VectorAdd( p[j], forward, p[j] ); + } + + if (j & 4) + { + } + else + { + VectorAdd( p[j], up, p[j] ); + } + } + + VectorNormalize( up ); + VectorNormalize( right ); + VectorNormalize( forward ); + + pRenderContext->Bind( m_pMaterialModelBones ); + + for (j = 0; j < 6; j++) + { + switch( j) + { + case 0: VectorCopy( right, tmp ); break; + case 1: VectorCopy( forward, tmp ); break; + case 2: VectorCopy( up, tmp ); break; + case 3: VectorScale( right, -1, tmp ); break; + case 4: VectorScale( forward, -1, tmp ); break; + case 5: VectorScale( up, -1, tmp ); break; + } + // R_StudioLighting( &lv, -1, 0, tmp ); // BUG: not updated + + IMesh* pMesh = pRenderContext->GetDynamicMesh(); + CMeshBuilder meshBuilder; + meshBuilder.Begin( pMesh, MATERIAL_QUADS, 1 ); + + for (int k = 0; k < 4; ++k) + { + meshBuilder.Position3fv( p[boxpnt[j][k]].Base() ); + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End(); + pMesh->Draw(); + } + } +} + + +int CStudioRender::R_StudioRenderModel( IMatRenderContext *pRenderContext, int skin, + int body, int hitboxset, void /*IClientEntity*/ *pEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int flags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + VPROF("CStudioRender::R_StudioRenderModel"); + + int nDrawGroup = flags & STUDIORENDER_DRAW_GROUP_MASK; + + if ( m_pRC->m_Config.drawEntities == 2 ) + { + if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY ) + { + R_StudioDrawBones( ); + } + return 0; + } + + if ( m_pRC->m_Config.drawEntities == 3 ) + { + if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY ) + { + R_StudioDrawHulls( hitboxset, false ); + } + return 0; + } + + // BUG: This method is crap, though less crap than before. It should just sort + // the materials though it'll need to sort at render time as "skin" + // can change what materials a given mesh may use + int numTrianglesRendered = 0; + + // don't try to use these if not supported + if ( IsPC() && !g_pMaterialSystemHardwareConfig->SupportsColorOnSecondStream() ) + { + pColorMeshes = NULL; + } + + // Build list of submodels + BodyPartInfo_t *pBodyPartInfo = (BodyPartInfo_t*)_alloca( m_pStudioHdr->numbodyparts * sizeof(BodyPartInfo_t) ); + for ( int i=0 ; i < m_pStudioHdr->numbodyparts; ++i ) + { + pBodyPartInfo[i].m_nSubModelIndex = R_StudioSetupModel( i, body, &pBodyPartInfo[i].m_pSubModel, m_pStudioHdr ); + } + + // mark possible translucent meshes + if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY ) + { + // we're going to render the opaque meshes, so these will get counted in that pass + m_bSkippedMeshes = false; + m_bDrawTranslucentSubModels = false; + numTrianglesRendered += R_StudioRenderFinal( pRenderContext, skin, m_pStudioHdr->numbodyparts, pBodyPartInfo, + pEntity, ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes ); + } + else + { + m_bSkippedMeshes = true; + } + + if ( m_bSkippedMeshes && nDrawGroup != STUDIORENDER_DRAW_OPAQUE_ONLY ) + { + m_bDrawTranslucentSubModels = true; + numTrianglesRendered += R_StudioRenderFinal( pRenderContext, skin, m_pStudioHdr->numbodyparts, pBodyPartInfo, + pEntity, ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes ); + } + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Generate morph accumulator +//----------------------------------------------------------------------------- +void CStudioRender::GenerateMorphAccumulator( mstudiomodel_t *pSubModel ) +{ + // Deal with all flexes + // FIXME: HW Morphing doesn't work with translucent models yet + if ( !m_pRC->m_Config.m_bEnableHWMorph || !m_pRC->m_Config.bFlex || m_bDrawTranslucentSubModels || + !g_pMaterialSystemHardwareConfig->HasFastVertexTextures() ) + return; + + int nActiveMeshCount = 0; + mstudiomesh_t *ppMeshes[512]; + + // First, build the list of meshes that need morphing + for ( int i = 0; i < pSubModel->nummeshes; ++i ) + { + mstudiomesh_t *pMesh = pSubModel->pMesh(i); + studiomeshdata_t *pMeshData = &m_pStudioMeshes[pMesh->meshid]; + Assert( pMeshData ); + + int nFlexCount = pMesh->numflexes; + if ( !nFlexCount ) + continue; + + for ( int j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + bool bIsDeltaFlexed = (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) != 0; + if ( !bIsDeltaFlexed ) + continue; + + ppMeshes[nActiveMeshCount++] = pMesh; + Assert( nActiveMeshCount < 512 ); + break; + } + } + + if ( nActiveMeshCount == 0 ) + return; + + // HACK - Just turn off scissor for this model if it is doing morph accumulation + DisableScissor(); + + // Next, accumulate morphs for appropriate meshes + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->BeginMorphAccumulation(); + for ( int i = 0; i < nActiveMeshCount; ++i ) + { + mstudiomesh_t *pMesh = ppMeshes[i]; + studiomeshdata_t *pMeshData = &m_pStudioMeshes[pMesh->meshid]; + + int nFlexCount = pMesh->numflexes; + MorphWeight_t *pWeights = (MorphWeight_t*)_alloca( nFlexCount * sizeof(MorphWeight_t) ); + ComputeFlexWeights( nFlexCount, pMesh->pFlex(0), pWeights ); + + for ( int j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + if ( !pGroup->m_pMorph ) + continue; + + pRenderContext->AccumulateMorph( pGroup->m_pMorph, nFlexCount, pWeights ); + } + } + pRenderContext->EndMorphAccumulation(); +} + + +//----------------------------------------------------------------------------- +// Computes eyeball state +//----------------------------------------------------------------------------- +void CStudioRender::ComputeEyelidStateFACS( mstudiomodel_t *pSubModel ) +{ + for ( int j = 0; j < pSubModel->numeyeballs; j++ ) + { + // FIXME: This might not be necessary... + R_StudioEyeballPosition( pSubModel->pEyeball( j ), &m_pEyeballState[ j ] ); + R_StudioEyelidFACS( pSubModel->pEyeball(j), &m_pEyeballState[j] ); + } +} + + +/* +================ +R_StudioRenderFinal +inputs: +outputs: returns the number of triangles rendered. +================ +*/ +int CStudioRender::R_StudioRenderFinal( IMatRenderContext *pRenderContext, + int skin, int nBodyPartCount, BodyPartInfo_t *pBodyPartInfo, void /*IClientEntity*/ *pClientEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + VPROF("CStudioRender::R_StudioRenderFinal"); + + int numTrianglesRendered = 0; + + for ( int i=0 ; i < nBodyPartCount; i++ ) + { + m_pSubModel = pBodyPartInfo[i].m_pSubModel; + + // NOTE: This has to run here because it effects flex targets, + // so therefore it must happen prior to GenerateMorphAccumulator. + ComputeEyelidStateFACS( m_pSubModel ); + GenerateMorphAccumulator( m_pSubModel ); + + // Set up SW flex + m_VertexCache.SetBodyPart( i ); + m_VertexCache.SetModel( pBodyPartInfo[i].m_nSubModelIndex ); + + numTrianglesRendered += R_StudioDrawPoints( pRenderContext, skin, pClientEntity, + ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes ); + } + return numTrianglesRendered; +} + +static ConVar r_flashlightscissor( "r_flashlightscissor", "1", 0 ); + +void CStudioRender::EnableScissor( FlashlightState_t *state ) +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // Only scissor into the backbuffer + if ( r_flashlightscissor.GetBool() && state->DoScissor() && ( pRenderContext->GetRenderTarget() == NULL ) ) + { + pRenderContext->SetScissorRect( state->GetLeft(), state->GetTop(), state->GetRight(), state->GetBottom(), true ); + } +} + +void CStudioRender::DisableScissor() +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + // Scissor even if we're not shadow depth mapping + if ( r_flashlightscissor.GetBool() ) + { + pRenderContext->SetScissorRect( -1, -1, -1, -1, false ); + } +} + + +//----------------------------------------------------------------------------- +// Draw shadows +//----------------------------------------------------------------------------- +void CStudioRender::DrawShadows( const DrawModelInfo_t& info, int flags, int boneMask ) +{ + if ( !m_ShadowState.Count() ) + return; + + VPROF("CStudioRender::DrawShadows"); + + IMaterial* pForcedMat = m_pRC->m_pForcedMaterial; + OverrideType_t nForcedType = m_pRC->m_nForcedMaterialType; + + // Here, we have to redraw the model one time for each flashlight + // Having a material of NULL means that we are a light source. + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + pRenderContext->SetFlashlightMode( true ); + int i; + for (i = 0; i < m_ShadowState.Count(); ++i ) + { + if( !m_ShadowState[i].m_pMaterial ) + { + Assert( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture ); + pRenderContext->SetFlashlightStateEx( *m_ShadowState[i].m_pFlashlightState, *m_ShadowState[i].m_pWorldToTexture, m_ShadowState[i].m_pFlashlightDepthTexture ); + + EnableScissor( m_ShadowState[i].m_pFlashlightState ); + + R_StudioRenderModel( pRenderContext, info.m_Skin, info.m_Body, info.m_HitboxSet, info.m_pClientEntity, + info.m_pHardwareData->m_pLODs[info.m_Lod].ppMaterials, + info.m_pHardwareData->m_pLODs[info.m_Lod].pMaterialFlags, flags, boneMask, info.m_Lod, info.m_pColorMeshes ); + + DisableScissor(); + } + } + pRenderContext->SetFlashlightMode( false ); + + // Here, we have to redraw the model one time for each shadow + for (int i = 0; i < m_ShadowState.Count(); ++i ) + { + if( m_ShadowState[i].m_pMaterial ) + { + m_pRC->m_pForcedMaterial = m_ShadowState[i].m_pMaterial; + m_pRC->m_nForcedMaterialType = OVERRIDE_NORMAL; + R_StudioRenderModel( pRenderContext, 0, info.m_Body, 0, m_ShadowState[i].m_pProxyData, + NULL, NULL, flags, boneMask, info.m_Lod, NULL ); + } + } + + // Restore the previous forced material + m_pRC->m_pForcedMaterial = pForcedMat; + m_pRC->m_nForcedMaterialType = nForcedType; +} + +void CStudioRender::DrawStaticPropShadows( const DrawModelInfo_t &info, const StudioRenderContext_t &rc, const matrix3x4_t& rootToWorld, int flags ) +{ + memcpy( &m_StaticPropRootToWorld, &rootToWorld, sizeof(matrix3x4_t) ); + memcpy( &m_PoseToWorld[0], &rootToWorld, sizeof(matrix3x4_t) ); + + m_pRC = const_cast< StudioRenderContext_t* >( &rc ); + m_pBoneToWorld = &m_StaticPropRootToWorld; + m_pStudioHdr = info.m_pStudioHdr; + m_pStudioMeshes = info.m_pHardwareData->m_pLODs[info.m_Lod].m_pMeshData; + DrawShadows( info, flags, BONE_USED_BY_ANYTHING ); + m_pRC = NULL; + m_pBoneToWorld = NULL; +} + +// Draw flashlight lighting on decals. +void CStudioRender::DrawFlashlightDecals( const DrawModelInfo_t& info, int lod ) +{ + if ( !m_ShadowState.Count() ) + return; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->SetFlashlightMode( true ); + int i; + for (i = 0; i < m_ShadowState.Count(); ++i ) + { + // This isn't clear. This means that this is a flashlight if the material is NULL. FLASHLIGHTFIXME + if( !m_ShadowState[i].m_pMaterial ) + { + Assert( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture ); + pRenderContext->SetFlashlightStateEx( *m_ShadowState[i].m_pFlashlightState, *m_ShadowState[i].m_pWorldToTexture, m_ShadowState[i].m_pFlashlightDepthTexture ); + + EnableScissor( m_ShadowState[i].m_pFlashlightState ); + + DrawDecal( info, lod, info.m_Body ); + + DisableScissor(); + } + } + pRenderContext->SetFlashlightMode( false ); +} + + +static matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result ) +{ + float flWeight0, flWeight1, flWeight2; + + switch( boneweights.numbones ) + { + default: + case 1: + return &pPoseToWorld[(unsigned)boneweights.bone[0]]; + + case 2: + { + matrix3x4_t &boneMat0 = pPoseToWorld[(unsigned)boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[(unsigned)boneweights.bone[1]]; + flWeight0 = boneweights.weight[0]; + flWeight1 = boneweights.weight[1]; + + // NOTE: Inlining here seems to make a fair amount of difference + result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1; + result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1; + result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1; + result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1; + result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1; + result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1; + result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1; + result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1; + result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1; + result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1; + result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1; + result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1; + } + return &result; + + case 3: + { + matrix3x4_t &boneMat0 = pPoseToWorld[(unsigned)boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[(unsigned)boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[(unsigned)boneweights.bone[2]]; + flWeight0 = boneweights.weight[0]; + flWeight1 = boneweights.weight[1]; + flWeight2 = boneweights.weight[2]; + + result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1 + boneMat2[0][0] * flWeight2; + result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1 + boneMat2[0][1] * flWeight2; + result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1 + boneMat2[0][2] * flWeight2; + result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1 + boneMat2[0][3] * flWeight2; + result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1 + boneMat2[1][0] * flWeight2; + result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1 + boneMat2[1][1] * flWeight2; + result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1 + boneMat2[1][2] * flWeight2; + result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1 + boneMat2[1][3] * flWeight2; + result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1 + boneMat2[2][0] * flWeight2; + result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1 + boneMat2[2][1] * flWeight2; + result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1 + boneMat2[2][2] * flWeight2; + result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1 + boneMat2[2][3] * flWeight2; + } + return &result; + + case 4: + Assert(0); +#if (MAX_NUM_BONES_PER_VERT > 3) + { + // Don't compile this if MAX_NUM_BONES_PER_VERT is too low + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; + matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]]; + flWeight0 = boneweights.weight[0]; + flWeight1 = boneweights.weight[1]; + flWeight2 = boneweights.weight[2]; + float flWeight3 = boneweights.weight[3]; + + result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1 + boneMat2[0][0] * flWeight2 + boneMat3[0][0] * flWeight3; + result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1 + boneMat2[0][1] * flWeight2 + boneMat3[0][1] * flWeight3; + result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1 + boneMat2[0][2] * flWeight2 + boneMat3[0][2] * flWeight3; + result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1 + boneMat2[0][3] * flWeight2 + boneMat3[0][3] * flWeight3; + result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1 + boneMat2[1][0] * flWeight2 + boneMat3[1][0] * flWeight3; + result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1 + boneMat2[1][1] * flWeight2 + boneMat3[1][1] * flWeight3; + result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1 + boneMat2[1][2] * flWeight2 + boneMat3[1][2] * flWeight3; + result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1 + boneMat2[1][3] * flWeight2 + boneMat3[1][3] * flWeight3; + result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1 + boneMat2[2][0] * flWeight2 + boneMat3[2][0] * flWeight3; + result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1 + boneMat2[2][1] * flWeight2 + boneMat3[2][1] * flWeight3; + result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1 + boneMat2[2][2] * flWeight2 + boneMat3[2][2] * flWeight3; + result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1 + boneMat2[2][3] * flWeight2 + boneMat3[2][3] * flWeight3; + } + return &result; +#endif + } + + Assert(0); + return NULL; +} + + +static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result ) +{ + // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization +#if defined( _WIN32 ) && !defined( _X360 ) + switch( boneweights.numbones ) + { + default: + case 1: + return &pPoseToWorld[boneweights.bone[0]]; + + case 2: + { + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + float *pWeights = boneweights.weight; + + _asm + { + mov eax, DWORD PTR [pWeights] + movss xmm6, dword ptr[eax] ; boneweights.weight[0] + movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1] + + mov eax, DWORD PTR [boneMat0] + mov ecx, DWORD PTR [boneMat1] + mov edi, DWORD PTR [result] + + // Fill xmm6, and 7 with all the bone weights + shufps xmm6, xmm6, 0 + shufps xmm7, xmm7, 0 + + // Load up all rows of the three matrices + movaps xmm0, XMMWORD PTR [eax] + movaps xmm1, XMMWORD PTR [ecx] + movaps xmm2, XMMWORD PTR [eax + 16] + movaps xmm3, XMMWORD PTR [ecx + 16] + movaps xmm4, XMMWORD PTR [eax + 32] + movaps xmm5, XMMWORD PTR [ecx + 32] + + // Multiply the rows by the weights + mulps xmm0, xmm6 + mulps xmm1, xmm7 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + mulps xmm4, xmm6 + mulps xmm5, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm4, xmm5 + + movaps XMMWORD PTR [edi], xmm0 + movaps XMMWORD PTR [edi + 16], xmm2 + movaps XMMWORD PTR [edi + 32], xmm4 + } + } + return &result; + + case 3: + { + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; + float *pWeights = boneweights.weight; + + _asm + { + mov eax, DWORD PTR [pWeights] + movss xmm5, dword ptr[eax] ; boneweights.weight[0] + movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1] + movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2] + + mov eax, DWORD PTR [boneMat0] + mov ecx, DWORD PTR [boneMat1] + mov edx, DWORD PTR [boneMat2] + mov edi, DWORD PTR [result] + + // Fill xmm5, 6, and 7 with all the bone weights + shufps xmm5, xmm5, 0 + shufps xmm6, xmm6, 0 + shufps xmm7, xmm7, 0 + + // Load up the first row of the three matrices + movaps xmm0, XMMWORD PTR [eax] + movaps xmm1, XMMWORD PTR [ecx] + movaps xmm2, XMMWORD PTR [edx] + + // Multiply the rows by the weights + mulps xmm0, xmm5 + mulps xmm1, xmm6 + mulps xmm2, xmm7 + + addps xmm0, xmm1 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi], xmm0 + + // Load up the second row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 16] + movaps xmm1, XMMWORD PTR [ecx + 16] + movaps xmm2, XMMWORD PTR [edx + 16] + + // Multiply the rows by the weights + mulps xmm0, xmm5 + mulps xmm1, xmm6 + mulps xmm2, xmm7 + + addps xmm0, xmm1 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 16], xmm0 + + // Load up the third row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 32] + movaps xmm1, XMMWORD PTR [ecx + 32] + movaps xmm2, XMMWORD PTR [edx + 32] + + // Multiply the rows by the weights + mulps xmm0, xmm5 + mulps xmm1, xmm6 + mulps xmm2, xmm7 + + addps xmm0, xmm1 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 32], xmm0 + } + } + return &result; + + case 4: + Assert(0); +#if (MAX_NUM_BONES_PER_VERT > 3) + { + // Don't compile this if MAX_NUM_BONES_PER_VERT is too low + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; + matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]]; + float *pWeights = boneweights.weight; + + _asm + { + mov eax, DWORD PTR [pWeights] + movss xmm4, dword ptr[eax] ; boneweights.weight[0] + movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1] + movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2] + movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3] + + mov eax, DWORD PTR [boneMat0] + mov ecx, DWORD PTR [boneMat1] + mov edx, DWORD PTR [boneMat2] + mov esi, DWORD PTR [boneMat3] + mov edi, DWORD PTR [result] + + // Fill xmm5, 6, and 7 with all the bone weights + shufps xmm4, xmm4, 0 + shufps xmm5, xmm5, 0 + shufps xmm6, xmm6, 0 + shufps xmm7, xmm7, 0 + + // Load up the first row of the four matrices + movaps xmm0, XMMWORD PTR [eax] + movaps xmm1, XMMWORD PTR [ecx] + movaps xmm2, XMMWORD PTR [edx] + movaps xmm3, XMMWORD PTR [esi] + + // Multiply the rows by the weights + mulps xmm0, xmm4 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi], xmm0 + + // Load up the second row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 16] + movaps xmm1, XMMWORD PTR [ecx + 16] + movaps xmm2, XMMWORD PTR [edx + 16] + movaps xmm3, XMMWORD PTR [esi + 16] + + // Multiply the rows by the weights + mulps xmm0, xmm4 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 16], xmm0 + + // Load up the third row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 32] + movaps xmm1, XMMWORD PTR [ecx + 32] + movaps xmm2, XMMWORD PTR [edx + 32] + movaps xmm3, XMMWORD PTR [esi + 32] + + // Multiply the rows by the weights + mulps xmm0, xmm4 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 32], xmm0 + } + } + return &result; +#endif + } +#elif POSIX +#warning "ComputeSkinMatrixSSE C implementation only" + return ComputeSkinMatrix( boneweights, pPoseToWorld, result ); +#elif defined( _X360 ) + return ComputeSkinMatrix( boneweights, pPoseToWorld, result ); +#else + #error +#endif + + Assert( 0 ); + return NULL; +} + +//----------------------------------------------------------------------------- +// Designed for inter-module draw optimized calling, requires R_InitLightEffectWorld3() +// Compute the lighting at a point and normal +// Uses the set function pointer +// Final lighting is in gamma space +//----------------------------------------------------------------------------- +static lightpos_t lightpos[MAXLOCALLIGHTS]; +inline void CStudioRender::R_ComputeLightAtPoint3( const Vector &pos, const Vector &normal, Vector &color ) +{ + if ( m_pRC->m_Config.fullbright ) + { + color.Init( 1.0f, 1.0f, 1.0f ); + return; + } + + // Set up lightpos[i].dot, lightpos[i].falloff, and lightpos[i].delta for all lights + R_LightStrengthWorld( pos, m_pRC->m_NumLocalLights, m_pRC->m_LocalLights, lightpos ); + + // calculate ambient values from the ambient cube given a normal. + R_LightAmbient_4D( normal, m_pRC->m_LightBoxColors, color ); + + // Calculate color given lightpos_t lightpos, a normal, and the ambient + // color from the ambient cube calculated above. + Assert(R_LightEffectsWorld3); + R_LightEffectsWorld3( m_pRC->m_LocalLights, lightpos, normal, color ); +} + + +// define SPECIAL_SSE_MESH_PROCESSOR to enable code which contains a special optimized SSE lighting loop, significantly +// improving software vertex processing performace. +#if defined( _WIN32 ) && !defined( _X360 ) +#define SPECIAL_SSE_MESH_PROCESSOR +#endif + +#ifdef SPECIAL_SSE_MESH_PROCESSOR +//#define VERIFY_SSE_LIGHTING + +// false: MAX(0,L*N) true: .5*(L.N)+.5. set based on material +static bool SSELightingHalfLambert; + +// These variables are used by the special SSE lighting path. The +// lighting path calculates them everytime it processes a mesh so their +// is no need to keep them in sync with changes to the other light variables +static fltx4 OneOver_ThetaDot_Minus_PhiDot[MAXLOCALLIGHTS]; // 1/(theta-phi) + +void CStudioRender::R_MouthLighting( fltx4 fIllum, const FourVectors& normal, const FourVectors& forward, FourVectors &light ) +{ + fltx4 dot = SubSIMD(Four_Zeros,normal*forward); + dot=MaxSIMD(Four_Zeros,dot); + dot=MulSIMD(fIllum,dot); + light *= dot; +} + +inline void CStudioRender::R_ComputeLightAtPoints3( const FourVectors &pos, const FourVectors &normal, FourVectors &color ) +{ + if ( m_pRC->m_Config.fullbright ) + { + color.DuplicateVector( Vector( 1.0f, 1.0f, 1.0f ) ); + return; + } + + R_LightAmbient_4D( normal, m_pRC->m_LightBoxColors, color ); + // now, add in contribution from all lights + for ( int i = 0; i < m_pRC->m_NumLocalLights; i++) + { + FourVectors delta; + LightDesc_t const *wl = m_pRC->m_LocalLights+i; + Assert((wl->m_Type==MATERIAL_LIGHT_POINT) || (wl->m_Type==MATERIAL_LIGHT_SPOT) || (wl->m_Type==MATERIAL_LIGHT_DIRECTIONAL)); + switch (wl->m_Type) + { + case MATERIAL_LIGHT_POINT: + case MATERIAL_LIGHT_SPOT: + delta.DuplicateVector(wl->m_Position); + delta-=pos; + break; + + case MATERIAL_LIGHT_DIRECTIONAL: + delta.DuplicateVector(wl->m_Direction); + delta*=-1.0; + break; + + } + fltx4 falloff = R_WorldLightDistanceFalloff( wl, delta); + delta.VectorNormalizeFast(); + fltx4 strength=delta*normal; + if (SSELightingHalfLambert) + { + strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives); + } + else + strength=MaxSIMD(Four_Zeros,delta*normal); + + switch(wl->m_Type) + { + case MATERIAL_LIGHT_POINT: + // half-lambert + break; + + case MATERIAL_LIGHT_SPOT: + { + fltx4 dot2=SubSIMD(Four_Zeros,delta*wl->m_Direction); // dot position with spot light dir for cone falloff + + fltx4 cone_falloff_scale=MulSIMD(OneOver_ThetaDot_Minus_PhiDot[i], + SubSIMD(dot2,ReplicateX4(wl->m_PhiDot))); + cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones); + if ((wl->m_Falloff!=0.0) && (wl->m_Falloff!=1.0)) + { + // !!speed!! could compute integer exponent needed by powsimd and store in light + cone_falloff_scale=PowSIMD(cone_falloff_scale,wl->m_Falloff); + } + strength=MulSIMD(cone_falloff_scale,strength); + + // now, zero out lighting where dot2<phidot. This will mask out any invalid results + // from pow function, etc + fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(wl->m_PhiDot)); // outside light cone? + strength=AndSIMD(OutsideMask,strength); + } + break; + + case MATERIAL_LIGHT_DIRECTIONAL: + break; + + } + strength=MulSIMD(strength,falloff); + color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(wl->m_Color.x))); + color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(wl->m_Color.y))); + color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(wl->m_Color.z))); + } +} + +#endif // SPECIAL_SSE_MESH_PROCESSOR + +//----------------------------------------------------------------------------- +// Optimized for low-end hardware +//----------------------------------------------------------------------------- +#pragma warning (disable:4701) + +// NOTE: I'm using this crazy wrapper because using straight template functions +// doesn't appear to work with function tables +template< int nHasTangentSpace, int nDoFlex, int nHasSIMD, int nLighting, int nDX8VertexFormat > +class CProcessMeshWrapper +{ +public: + static void R_PerformLighting( const Vector &forward, float fIllum, + const Vector &pos, const Vector &norm, unsigned int nAlphaMask, unsigned int *pColor ) + { + if ( nLighting == LIGHTING_SOFTWARE ) + { + Vector color; + g_StudioRender.R_ComputeLightAtPoint3( pos, norm, color ); + + unsigned char r = LinearToLightmap( color.x ); + unsigned char g = LinearToLightmap( color.y ); + unsigned char b = LinearToLightmap( color.z ); + + *pColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + else if ( nLighting == LIGHTING_MOUTH ) + { + if ( fIllum != 0.0f ) + { + Vector color; + g_StudioRender.R_ComputeLightAtPoint3( pos, norm, color ); + g_StudioRender.R_MouthLighting( fIllum, norm, forward, color ); + + unsigned char r = LinearToLightmap( color.x ); + unsigned char g = LinearToLightmap( color.y ); + unsigned char b = LinearToLightmap( color.z ); + + *pColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + else + { + *pColor = nAlphaMask; + } + } + } + + static void R_TransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, const Vector4D *pSrcTangentS, + matrix3x4_t *pSkinMat, VectorAligned &pos, Vector &norm, Vector4DAligned &tangentS ) + { + // NOTE: Could add SSE stuff here, if we knew what SSE stuff could make it faster + + pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; + norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2]; + + pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; + norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2]; + + pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; + norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2]; + + if ( nHasTangentSpace ) + { + tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2]; + tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2]; + tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2]; + tangentS.w = pSrcTangentS->w; + } + } + + static void R_StudioSoftwareProcessMesh( const mstudio_meshvertexdata_t *vertData, matrix3x4_t *pPoseToWorld, + CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask, + IMaterial* pMaterial) + { + Vector color; + Vector4D *pStudioTangentS; + Vector4DAligned tangentS; + Vector *pSrcPos; + Vector *pSrcNorm; + Vector4D *pSrcTangentS = NULL; + + ALIGN16 ModelVertexDX8_t dstVertex ALIGN16_POST; + dstVertex.m_flBoneWeights[0] = 1.0f; + dstVertex.m_flBoneWeights[1] = 0.0f; + dstVertex.m_nBoneIndices = 0; + dstVertex.m_nColor = 0xFFFFFFFF; + dstVertex.m_vecUserData.Init( 1.0f, 0.0f, 0.0f, 1.0f ); + + ALIGN16 matrix3x4_t temp ALIGN16_POST; + ALIGN16 matrix3x4_t *pSkinMat ALIGN16_POST; + + int ntemp[PREFETCH_VERT_COUNT]; + + Assert( numVertices > 0 ); + + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + if (nHasTangentSpace) + { + pStudioTangentS = vertData->TangentS( 0 ); + Assert( pStudioTangentS->w == -1.0f || pStudioTangentS->w == 1.0f ); + } + + // Mouth related stuff... + float fIllum = 1.0f; + Vector forward; + if (nLighting == LIGHTING_MOUTH) + { + g_StudioRender.R_MouthComputeLightingValues( fIllum, forward ); + } + + if ((nLighting == LIGHTING_MOUTH) || (nLighting == LIGHTING_SOFTWARE)) + { + g_StudioRender.R_InitLightEffectsWorld3(); + } +#ifdef _DEBUG + // In debug, clear it out to ensure we aren't accidentially calling + // the last setup for R_ComputeLightForPoint3. + else + { + g_StudioRender.R_LightEffectsWorld3 = NULL; + } +#endif + +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + // Precaches the data + _mm_prefetch( (char*)((int)pGroupToMesh & (~0x1F)), _MM_HINT_NTA ); + } +#endif + for ( int i = 0; i < PREFETCH_VERT_COUNT; ++i ) + { + ntemp[i] = pGroupToMesh[i]; +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + char *pMem = (char*)&pVertices[ntemp[i]]; + _mm_prefetch( pMem, _MM_HINT_NTA ); + _mm_prefetch( pMem + 32, _MM_HINT_NTA ); + if ( nHasTangentSpace ) + { + _mm_prefetch( (char*)&pStudioTangentS[ntemp[i]], _MM_HINT_NTA ); + } + } +#endif + } + + int n, idx; + for ( int j=0; j < numVertices; ++j ) + { +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + char *pMem = (char*)&pGroupToMesh[j + PREFETCH_VERT_COUNT + 1]; + _mm_prefetch( (char*)((int)pMem & (~0x1F)), _MM_HINT_NTA ); + } +#endif + idx = j & (PREFETCH_VERT_COUNT-1); + n = ntemp[idx]; + + mstudiovertex_t &vert = pVertices[n]; + + ntemp[idx] = pGroupToMesh[j + PREFETCH_VERT_COUNT]; + + // Compute the skinning matrix + if ( nHasSIMD ) + { + pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp ); + } + else + { + pSkinMat = ComputeSkinMatrix( vert.m_BoneWeights, pPoseToWorld, temp ); + } + + // transform into world space + if (nDoFlex && vertexCache.IsVertexFlexed(n)) + { + CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex(n); + pSrcPos = &pFlexedVertex->m_Position; + pSrcNorm = &pFlexedVertex->m_Normal; + + if (nHasTangentSpace) + { + pSrcTangentS = &pFlexedVertex->m_TangentS; + Assert( pSrcTangentS->w == -1.0f || pSrcTangentS->w == 1.0f ); + } + } + else + { + pSrcPos = &vert.m_vecPosition; + pSrcNorm = &vert.m_vecNormal; + + if (nHasTangentSpace) + { + pSrcTangentS = &pStudioTangentS[n]; + Assert( pSrcTangentS->w == -1.0f || pSrcTangentS->w == 1.0f ); + } + } + + // Transform the vert into world space + R_TransformVert( pSrcPos, pSrcNorm, pSrcTangentS, pSkinMat, + *(VectorAligned*)&dstVertex.m_vecPosition, dstVertex.m_vecNormal, *(Vector4DAligned*)&dstVertex.m_vecUserData ); + +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + _mm_prefetch( (char*)&pVertices[ntemp[idx]], _MM_HINT_NTA); + _mm_prefetch( (char*)&pVertices[ntemp[idx]] + 32, _MM_HINT_NTA ); + if ( nHasTangentSpace ) + { + _mm_prefetch( (char*)&pStudioTangentS[ntemp[idx]], _MM_HINT_NTA ); + } + } +#endif + // Compute lighting + R_PerformLighting( forward, fIllum, dstVertex.m_vecPosition, dstVertex.m_vecNormal, nAlphaMask, &dstVertex.m_nColor ); + + dstVertex.m_vecTexCoord = vert.m_vecTexCoord; + + if ( IsX360() || nDX8VertexFormat ) + { +#if !defined( _X360 ) + Assert( dstVertex.m_vecUserData.w == -1.0f || dstVertex.m_vecUserData.w == 1.0f ); + if ( nHasSIMD ) + { + meshBuilder.FastVertexSSE( dstVertex ); + } + else + { + meshBuilder.FastVertex( dstVertex ); + } +#else + meshBuilder.VertexDX8ToX360( dstVertex ); +#endif + } + else + { + if ( nHasSIMD ) + { + meshBuilder.FastVertexSSE( *(ModelVertexDX7_t*)&dstVertex ); + } + else + { + meshBuilder.FastVertex( *(ModelVertexDX7_t*)&dstVertex ); + } + } + } + meshBuilder.FastAdvanceNVertices( numVertices ); + } + +#ifdef SPECIAL_SSE_MESH_PROCESSOR + +#ifdef VERIFY_SSE_LIGHTING + static int NotCloseEnough( float a, float b ) + { + // check if 2 linear lighting values are close enough between the sse and non see lighting model + // no point being more precise than 1% since it all maps to 8 bit anyway + float thresh=0.1f*fabs( a ); + if ( thresh < 0.1f ) + thresh = 0.1f; + return ( fabs( a-b ) > thresh ); + } +#endif + + // this special version of the vertex processor does 4 vertices at once, so that they can be lit using SSE instructions. This provides + // a >2x speedup in the lit case + static void R_PerformVectorizedLightingSSE( const FourVectors &forward, fltx4 fIllum, ModelVertexDX8_t *dst, unsigned int nAlphaMask) + { + if ( nLighting == LIGHTING_SOFTWARE ) + { +#ifdef VERIFY_SSE_LIGHTING +// if ( (g_StudioRender.m_NumLocalLights==1) && +// ( (g_StudioRender.m_LocalLights[0].m_Type==MATERIAL_LIGHT_SPOT))) +// { +// // ihvtest doesn't use different exponents for its spots, +// // so i mess with the exponents when testing +// static int ctr=0; +// static float exps[8]={0,1,2,3,4,4.5,5.25,2.5}; +// ctr=(ctr+1)&7; +// g_StudioRender.m_LocalLights[0].m_Falloff=exps[ctr]; +// } +#endif + FourVectors Position; + Position.LoadAndSwizzleAligned(dst[0].m_vecPosition,dst[1].m_vecPosition,dst[2].m_vecPosition,dst[3].m_vecPosition); + FourVectors Normal(dst[0].m_vecNormal,dst[1].m_vecNormal,dst[2].m_vecNormal,dst[3].m_vecNormal); + FourVectors Color; + g_StudioRender.R_ComputeLightAtPoints3( Position, Normal, Color); + + for (int i=0; i<4; i++) + { + Vector color; +#ifdef VERIFY_SSE_LIGHTING + // debug - check sse version against "real" version + g_StudioRender.R_ComputeLightAtPoint3( dst[i].m_vecPosition,dst[i].m_vecNormal, color ); + if ( NotCloseEnough(color.x,Color.X(i)) || + NotCloseEnough(color.y,Color.Y(i)) || + NotCloseEnough(color.z,Color.Z(i))) + { + Assert(0); + // recompute so can step in debugger + g_StudioRender.R_ComputeLightAtPoints3( Position,Normal,Color); + g_StudioRender.R_ComputeLightAtPoint3( dst[i].m_vecPosition,dst[i].m_vecNormal, color ); + } +#endif + unsigned char r = LinearToLightmap( Color.X(i) ); + unsigned char g = LinearToLightmap( Color.Y(i) ); + unsigned char b = LinearToLightmap( Color.Z(i) ); + + dst[i].m_nColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + } + else if ( nLighting == LIGHTING_MOUTH ) + { + FourVectors Position; + Position.LoadAndSwizzleAligned(dst[0].m_vecPosition,dst[1].m_vecPosition,dst[2].m_vecPosition,dst[3].m_vecPosition); + FourVectors Normal(dst[0].m_vecNormal,dst[1].m_vecNormal,dst[2].m_vecNormal,dst[3].m_vecNormal); + FourVectors Color; + + g_StudioRender.R_ComputeLightAtPoints3( Position, Normal, Color); + g_StudioRender.R_MouthLighting( fIllum, Normal, forward, Color ); + for (int i=0; i<4; i++) + { + unsigned char r = LinearToLightmap( Color.X(i) ); + unsigned char g = LinearToLightmap( Color.Y(i) ); + unsigned char b = LinearToLightmap( Color.Z(i) ); + + dst[i].m_nColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + } + } + + static void R_StudioSoftwareProcessMeshSSE_DX7( const mstudio_meshvertexdata_t *vertData, matrix3x4_t *pPoseToWorld, + CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask, + IMaterial* pMaterial) + { + Assert( numVertices > 0 ); + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + +#define N_VERTS_TO_DO_AT_ONCE 4 // for SSE processing + Assert(N_VERTS_TO_DO_AT_ONCE<=PREFETCH_VERT_COUNT); + + SSELightingHalfLambert=(pMaterial && (pMaterial->GetMaterialVarFlag( MATERIAL_VAR_HALFLAMBERT))); + Vector color; + Vector *pSrcPos; + Vector *pSrcNorm; + + ALIGN16 ModelVertexDX8_t dstVertexBuf[N_VERTS_TO_DO_AT_ONCE] ALIGN16_POST; + for(int i=0;i<N_VERTS_TO_DO_AT_ONCE;i++) + { + dstVertexBuf[i].m_flBoneWeights[0] = 1.0f; + dstVertexBuf[i].m_flBoneWeights[1] = 0.0f; + dstVertexBuf[i].m_nBoneIndices = 0; + dstVertexBuf[i].m_nColor = 0xFFFFFFFF; + dstVertexBuf[i].m_vecUserData.Init( 1.0f, 0.0f, 0.0f, 1.0f ); + } + + // do per-light precalcs. Better than doing them per vertex + for ( int l = 0; l < g_StudioRender.m_pRC->m_NumLocalLights; l++) + { + LightDesc_t *wl=g_StudioRender.m_pRC->m_LocalLights+l; + if (wl->m_Type==MATERIAL_LIGHT_SPOT) + { + float spread=wl->m_ThetaDot-wl->m_PhiDot; + if (spread>1.0e-10) + { + // note - this quantity is very sensitive to round off error. the sse + // reciprocal approximation won't cut it here. + OneOver_ThetaDot_Minus_PhiDot[l]=ReplicateX4(1.0/spread); + } + else + { + // hard falloff instead of divide by zero + OneOver_ThetaDot_Minus_PhiDot[l]=ReplicateX4(1.0); + } + } + } + + ALIGN16 matrix3x4_t temp ALIGN16_POST; + ALIGN16 matrix3x4_t *pSkinMat ALIGN16_POST; + + // Mouth related stuff... + float fIllum = 1.0f; + fltx4 fIllumReplicated; + + Vector forward; + FourVectors mouth_forward; + if (nLighting == LIGHTING_MOUTH) + { + g_StudioRender.R_MouthComputeLightingValues( fIllum, forward ); + mouth_forward.DuplicateVector(forward); + } + fIllumReplicated=ReplicateX4(fIllum); + + if ((nLighting == LIGHTING_MOUTH) || (nLighting == LIGHTING_SOFTWARE)) + { + g_StudioRender.R_InitLightEffectsWorld3(); + } +#ifdef _DEBUG + // In debug, clear it out to ensure we aren't accidentially calling + // the last setup for R_ComputeLightForPoint3. + else + { + g_StudioRender.R_LightEffectsWorld3 = NULL; + } +#endif + + int n_iters=numVertices; + + ModelVertexDX8_t *dst=dstVertexBuf; + while(1) + { + for(int subc=0;subc<4;subc++) + { + int n=*(pGroupToMesh++); + + mstudiovertex_t &vert = pVertices[n]; + + // Compute the skinning matrix + pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp ); + + // transform into world space + if (nDoFlex && vertexCache.IsVertexFlexed(n)) + { + CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex(n); + pSrcPos = &pFlexedVertex->m_Position; + pSrcNorm = &pFlexedVertex->m_Normal; + } + else + { + pSrcPos = &vert.m_vecPosition; + pSrcNorm = &vert.m_vecNormal; + + } + + // Transform the vert into world space + R_TransformVert( pSrcPos, pSrcNorm, 0, pSkinMat, + *(VectorAligned*)&dst->m_vecPosition, dst->m_vecNormal, *(Vector4DAligned*)&dst->m_vecUserData ); + + dst->m_vecTexCoord = vert.m_vecTexCoord; + dst++; + } + n_iters-=4; + dst=dstVertexBuf; + // Compute lighting + R_PerformVectorizedLightingSSE( mouth_forward, fIllumReplicated, dst, nAlphaMask); + if (n_iters<=0) // partial copy back? + { + // copy 1..3 verts + while(n_iters!=-4) + { + meshBuilder.FastVertexSSE( *(ModelVertexDX7_t*)dst ); + n_iters--; + dst++; + } + break; + } + else + { + meshBuilder.Fast4VerticesSSE( + (ModelVertexDX7_t*)&(dst[0]), + (ModelVertexDX7_t*)&(dst[1]), + (ModelVertexDX7_t*)&(dst[2]), + (ModelVertexDX7_t*)&(dst[3])); + } + } + meshBuilder.FastAdvanceNVertices( numVertices ); + } +#endif // SPECIAL_SSE_MESH_PROCESSOR +}; + +//----------------------------------------------------------------------------- +// Draws the mesh as tristrips using software +//----------------------------------------------------------------------------- +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, false, false, LIGHTING_HARDWARE, false > ProcessMesh000H7_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_SOFTWARE, false > ProcessMesh000S7_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_MOUTH, false > ProcessMesh000M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, false, true, LIGHTING_HARDWARE, false > ProcessMesh001H7_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_SOFTWARE, false > ProcessMesh001S7_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_MOUTH, false > ProcessMesh001M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, true, false, LIGHTING_HARDWARE, false > ProcessMesh010H7_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_SOFTWARE, false > ProcessMesh010S7_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_MOUTH, false > ProcessMesh010M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, true, true, LIGHTING_HARDWARE, false > ProcessMesh011H7_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_SOFTWARE, false > ProcessMesh011S7_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_MOUTH, false > ProcessMesh011M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, false, false, LIGHTING_HARDWARE, false > ProcessMesh100H7_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_SOFTWARE, false > ProcessMesh100S7_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_MOUTH, false > ProcessMesh100M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, false, true, LIGHTING_HARDWARE, false > ProcessMesh101H7_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_SOFTWARE, false > ProcessMesh101S7_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_MOUTH, false > ProcessMesh101M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, true, false, LIGHTING_HARDWARE, false > ProcessMesh110H7_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_SOFTWARE, false > ProcessMesh110S7_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_MOUTH, false > ProcessMesh110M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, true, true, LIGHTING_HARDWARE, false > ProcessMesh111H7_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_SOFTWARE, false > ProcessMesh111S7_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_MOUTH, false > ProcessMesh111M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, false, false, LIGHTING_HARDWARE, true > ProcessMesh000H8_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_SOFTWARE, true > ProcessMesh000S8_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_MOUTH, true > ProcessMesh000M8_t; +#endif + +typedef CProcessMeshWrapper< false, false, true, LIGHTING_HARDWARE, true > ProcessMesh001H8_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_SOFTWARE, true > ProcessMesh001S8_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_MOUTH, true > ProcessMesh001M8_t; + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, true, false, LIGHTING_HARDWARE, true > ProcessMesh010H8_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_SOFTWARE, true > ProcessMesh010S8_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_MOUTH, true > ProcessMesh010M8_t; +#endif + +typedef CProcessMeshWrapper< false, true, true, LIGHTING_HARDWARE, true > ProcessMesh011H8_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_SOFTWARE, true > ProcessMesh011S8_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_MOUTH, true > ProcessMesh011M8_t; + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, false, false, LIGHTING_HARDWARE, true > ProcessMesh100H8_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_SOFTWARE, true > ProcessMesh100S8_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_MOUTH, true > ProcessMesh100M8_t; +#endif + +typedef CProcessMeshWrapper< true, false, true, LIGHTING_HARDWARE, true > ProcessMesh101H8_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_SOFTWARE, true > ProcessMesh101S8_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_MOUTH, true > ProcessMesh101M8_t; + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, true, false, LIGHTING_HARDWARE, true > ProcessMesh110H8_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_SOFTWARE, true > ProcessMesh110S8_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_MOUTH, true > ProcessMesh110M8_t; +#endif + +typedef CProcessMeshWrapper< true, true, true, LIGHTING_HARDWARE, true > ProcessMesh111H8_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_SOFTWARE, true > ProcessMesh111S8_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_MOUTH, true > ProcessMesh111M8_t; + +static SoftwareProcessMeshFunc_t g_SoftwareProcessMeshFunc[] = +{ +#if !defined( _X360 ) + ProcessMesh000H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh000S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh000M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh001H7_t::R_StudioSoftwareProcessMesh, +#ifdef SPECIAL_SSE_MESH_PROCESSOR + ProcessMesh001S7_t::R_StudioSoftwareProcessMeshSSE_DX7, + ProcessMesh001M7_t::R_StudioSoftwareProcessMeshSSE_DX7, +#else + ProcessMesh001S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh001M7_t::R_StudioSoftwareProcessMesh, +#endif + + ProcessMesh010H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh010S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh010M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh011H7_t::R_StudioSoftwareProcessMesh, +#ifdef SPECIAL_SSE_MESH_PROCESSOR + ProcessMesh011S7_t::R_StudioSoftwareProcessMeshSSE_DX7, + ProcessMesh011M7_t::R_StudioSoftwareProcessMeshSSE_DX7, +#else + ProcessMesh011S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh011M7_t::R_StudioSoftwareProcessMesh, +#endif + + ProcessMesh100H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh100S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh100M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh101H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh101S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh101M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh110H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh110S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh110M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh111H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh111S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh111M7_t::R_StudioSoftwareProcessMesh, +#endif + +#if !defined( _X360 ) + ProcessMesh000H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh000S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh000M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh001H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh001S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh001M8_t::R_StudioSoftwareProcessMesh, +#if !defined( _X360 ) + ProcessMesh010H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh010S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh010M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh011H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh011S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh011M8_t::R_StudioSoftwareProcessMesh, +#if !defined( _X360 ) + ProcessMesh100H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh100S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh100M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh101H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh101S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh101M8_t::R_StudioSoftwareProcessMesh, +#if !defined( _X360 ) + ProcessMesh110H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh110S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh110M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh111H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh111S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh111M8_t::R_StudioSoftwareProcessMesh, +}; + +inline const mstudio_meshvertexdata_t * GetFatVertexData( mstudiomesh_t * pMesh, studiohdr_t * pStudioHdr ) +{ + if ( !pMesh->pModel()->CacheVertexData( pStudioHdr ) ) + { + // not available yet + return NULL; + } + const mstudio_meshvertexdata_t *pVertData = pMesh->GetVertexData( pStudioHdr ); + Assert( pVertData ); + if ( !pVertData ) + { + static unsigned int warnCount = 0; + if ( warnCount++ < 20 ) + Warning( "ERROR: model verts have been compressed, cannot render! (use \"-no_compressed_vvds\")" ); + } + return pVertData; +} + +void CStudioRender::R_StudioSoftwareProcessMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend, + bool bNeedsTangentSpace, bool bDX8Vertex, IMaterial *pMaterial ) +{ + unsigned int nAlphaMask = RoundFloatToInt( r_blend * 255.0f ); + nAlphaMask = clamp( nAlphaMask, 0, 255 ); + nAlphaMask <<= 24; + + // FIXME: Use function pointers to simplify this?!? + int idx; + if ( IsPC() ) + { + idx = bDX8Vertex * 24 + bNeedsTangentSpace * 12 + doFlex * 6 + MathLib_SSEEnabled() * 3 + lighting; + } + else + { + idx = bNeedsTangentSpace * 6 + doFlex * 3 + lighting; + } + + const mstudio_meshvertexdata_t *pVertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( pVertData ) + { + // invoke the software mesh processing handler + g_SoftwareProcessMeshFunc[idx]( pVertData, m_PoseToWorld, m_VertexCache, meshBuilder, numVertices, pGroupToMesh, nAlphaMask, pMaterial ); + } +} + +static void R_SlowTransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, + matrix3x4_t *pSkinMat, VectorAligned &pos, VectorAligned &norm ) +{ + pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; + norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2]; + + pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; + norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2]; + + pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; + norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2]; +} + +static void R_SlowTransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, const Vector4D *pSrcTangentS, + matrix3x4_t *pSkinMat, VectorAligned &pos, VectorAligned &norm, VectorAligned &tangentS ) +{ + pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; + norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2]; + tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2]; + + pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; + norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2]; + tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2]; + + pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; + norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2]; + tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2]; +} + +void CStudioRender::R_StudioSoftwareProcessMesh_Normals( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend, + bool bShowNormals, bool bShowTangentFrame ) +{ + ALIGN16 matrix3x4_t temp ALIGN16_POST; + ALIGN16 matrix3x4_t *pSkinMat ALIGN16_POST; + + Vector *pSrcPos = NULL; + Vector *pSrcNorm = NULL; + Vector4D *pSrcTangentS = NULL; + VectorAligned norm, pos, tangentS, tangentT; + + // Gets at the vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return; + } + + if ( bShowTangentFrame && !vertData->HasTangentData() ) + return; + + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + Vector4D *pTangentS = NULL; + Vector4D tang; + if ( bShowTangentFrame ) + { + pTangentS = vertData->TangentS( 0 ); + } + + for ( int j=0; j < numVertices; j++ ) + { + int n = pGroupToMesh[j]; + + mstudiovertex_t &vert = pVertices[n]; + if ( bShowTangentFrame ) + { + tang = pTangentS[n]; + } + + pSkinMat = ComputeSkinMatrix( vert.m_BoneWeights, m_PoseToWorld, temp ); + + // transform into world space + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n); + pSrcPos = &pFlexedVertex->m_Position; + pSrcNorm = &pFlexedVertex->m_Normal; + + if ( bShowTangentFrame ) + { + pSrcTangentS = &pFlexedVertex->m_TangentS; + } + } + else + { + pSrcPos = &vert.m_vecPosition; + pSrcNorm = &vert.m_vecNormal; + if ( bShowTangentFrame ) + { + pSrcTangentS = &tang; + } + } + + // Transform the vert into world space + if ( bShowTangentFrame && ( pSrcTangentS != NULL ) ) + { + R_SlowTransformVert( pSrcPos, pSrcNorm, pSrcTangentS, pSkinMat, pos, norm, tangentS ); + } + else + { + R_SlowTransformVert( pSrcPos, pSrcNorm, pSkinMat, pos, norm ); + } + + if ( bShowNormals ) + { + meshBuilder.Position3fv( pos.Base() ); + meshBuilder.Color3f( 0.0f, 0.0f, 1.0f ); + meshBuilder.AdvanceVertex(); + + Vector normalPos; + normalPos = pos + norm * 0.5f; + meshBuilder.Position3fv( normalPos.Base() ); + meshBuilder.Color3f( 0.0f, 0.0f, 1.0f ); + meshBuilder.AdvanceVertex(); + } + + if ( bShowTangentFrame && ( pSrcTangentS != NULL) ) + { + // TangentS + meshBuilder.Position3fv( pos.Base() ); + meshBuilder.Color3f( 1.0f, 0.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + Vector vTangentSPos; + vTangentSPos = pos + tangentS * 0.5f; + meshBuilder.Position3fv( vTangentSPos.Base() ); + meshBuilder.Color3f( 1.0f, 0.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + // TangentT + meshBuilder.Position3fv( pos.Base() ); + meshBuilder.Color3f( 0.0f, 1.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + // Compute tangentT from normal and tangentS + CrossProduct( norm, tangentS, tangentT ); + + Vector vTangentTPos; + vTangentTPos = pos + tangentT * 0.5f; + meshBuilder.Position3fv( vTangentTPos.Base() ); + meshBuilder.Color3f( 0.0f, 1.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + } // end tacking on tangentS and tangetT line segments + } +} + +#pragma warning (default:4701) + + + +template +void CCachedRenderData::ComputeFlexedVertex_StreamOffset<mstudiovertanim_t>( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, + mstudiovertanim_t *pvanim, int vertCount, float w1, float w2, float w3, float w4 ); + + + +void CStudioRender::R_StudioProcessFlexedMesh_StreamOffset( mstudiomesh_t* pmesh, int lod ) +{ + VPROF_BUDGET( "ProcessFlexedMesh_SO", _T("HW Morphing") ); + + if ( m_VertexCache.IsFlexComputationDone() ) + return; + + int vertCount = pmesh->vertexdata.numLODVertexes[lod]; + m_VertexCache.SetupComputation( pmesh, true ); + mstudioflex_t *pflex = pmesh->pFlex( 0 ); + + for (int i = 0; i < pmesh->numflexes; i++) + { + float w1 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexdesc ] ); + float w2 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexdesc ] ); + + float w3, w4; + if ( pflex[i].flexpair != 0) + { + w3 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexpair ] ); + w4 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexpair ] ); + } + else + { + w3 = w1; + w4 = w2; + } + + // Move on if the weights for this flex are sufficiently small + if (w1 > -0.001 && w1 < 0.001 && w2 > -0.001 && w2 < 0.001) + { + if (w3 > -0.001 && w3 < 0.001 && w4 > -0.001 && w4 < 0.001) + { + continue; + } + } + +#ifdef PLATFORM_WINDOWS + if ( pflex[i].vertanimtype == STUDIO_VERT_ANIM_NORMAL ) + { + mstudiovertanim_t *pvanim = pflex[i].pVertanim( 0 ); + m_VertexCache.ComputeFlexedVertex_StreamOffset_Optimized( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } + else + { + mstudiovertanim_wrinkle_t *pvanim = pflex[i].pVertanimWrinkle( 0 ); + m_VertexCache.ComputeFlexedVertexWrinkle_StreamOffset_Optimized( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } +#else // PLATFORM_WINDOWS + if ( pflex[i].vertanimtype == STUDIO_VERT_ANIM_NORMAL ) + { + mstudiovertanim_t *pvanim = pflex[i].pVertanim( 0 ); + m_VertexCache.ComputeFlexedVertex_StreamOffset( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } + else + { + mstudiovertanim_wrinkle_t *pvanim = pflex[i].pVertanimWrinkle( 0 ); + m_VertexCache.ComputeFlexedVertex_StreamOffset( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } +#endif // PLATFORM_WINDOWS + } +} + + +//----------------------------------------------------------------------------- +// Purpose: +// +// ** Only execute this function if device supports stream offset ** +// +// Input : pGroup - pointer to a studio mesh group +// Output : none +//----------------------------------------------------------------------------- +void CStudioRender::R_StudioFlexMeshGroup( studiomeshgroup_t *pGroup ) +{ + VPROF_BUDGET( "R_StudioFlexMeshGroup", VPROF_BUDGETGROUP_MODEL_RENDERING ); + + CMeshBuilder meshBuilder; + int nVertexOffsetInBytes = 0; + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + IMesh *pMesh = pRenderContext->GetFlexMesh(); + meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0, &nVertexOffsetInBytes ); + + // Just pos and norm deltas (tangents use same deltas as normals) + for ( int j=0; j < pGroup->m_NumVertices; j++) + { + int n = pGroup->m_pGroupIndexToMeshIndex[j]; + if ( m_VertexCache.IsThinVertexFlexed(n) ) + { + CachedPosNorm_t *pIn = m_VertexCache.GetThinFlexVertex(n); + meshBuilder.Position3fv( pIn->m_Position.Base() ); + meshBuilder.NormalDelta3fv( pIn->m_Normal.Base() ); + meshBuilder.Wrinkle1f( pIn->m_Position.w ); + } + else + { + meshBuilder.Position3f( 0.0f, 0.0f, 0.0f ); + meshBuilder.NormalDelta3f( 0.0f, 0.0f, 0.0f ); + meshBuilder.Wrinkle1f( 0.0f ); + } + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End( false, false ); + + pGroup->m_pMesh->SetFlexMesh( pMesh, nVertexOffsetInBytes ); +} + +//----------------------------------------------------------------------------- +// Processes a flexed mesh to be hw skinned +//----------------------------------------------------------------------------- +void CStudioRender::R_StudioProcessFlexedMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh ) +{ + PROFILE_STUDIO("FlexMeshBuilder"); + + Vector4D *pStudioTangentS; + + // get the vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return; + } + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + if (vertData->HasTangentData()) + { + pStudioTangentS = vertData->TangentS( 0 ); + Assert( pStudioTangentS->w == -1.0f || pStudioTangentS->w == 1.0f ); + + for ( int j=0; j < numVertices ; j++) + { + int n = pGroupToMesh[j]; + mstudiovertex_t &vert = pVertices[n]; + + // FIXME: For now, flexed hw-skinned meshes can only have one bone + // The data must exist in the 0th hardware matrix + + // Here, we are doing HW skinning, so we need to simply copy over the flex + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n); + meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() ); + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + Assert( pFlexedVertex->m_TangentS.w == -1.0f || pFlexedVertex->m_TangentS.w == 1.0f ); + meshBuilder.UserData( pFlexedVertex->m_TangentS.Base() ); + } + else + { + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( vert.m_vecNormal.Base() ); + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + Assert( pStudioTangentS[n].w == -1.0f || pStudioTangentS[n].w == 1.0f ); + meshBuilder.UserData( pStudioTangentS[n].Base() ); + } + + meshBuilder.AdvanceVertex(); + } + } + else + { + // no TangentS, replicated code to save inner conditional + for ( int j=0; j < numVertices ; j++) + { + int n = pGroupToMesh[j]; + mstudiovertex_t &vert = pVertices[n]; + + // FIXME: For now, flexed hw-skinned meshes can only have one bone + // The data must exist in the 0th hardware matrix + + // Here, we are doing HW skinning, so we need to simply copy over the flex + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n); + meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() ); + } + else + { + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( vert.m_vecNormal.Base() ); + } + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + meshBuilder.AdvanceVertex(); + } + } +} + +//----------------------------------------------------------------------------- +// Restores the static mesh +//----------------------------------------------------------------------------- +template<VertexCompressionType_t T> void CStudioRender::R_StudioRestoreMesh( mstudiomesh_t* pmesh, studiomeshgroup_t* pMeshData ) +{ + Vector4D *pStudioTangentS; + + if ( IsX360() ) + return; + + // get at the vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return; + } + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + if (vertData->HasTangentData()) + { + pStudioTangentS = vertData->TangentS( 0 ); + } + else + { + pStudioTangentS = NULL; + } + + CMeshBuilder meshBuilder; + + meshBuilder.BeginModify( pMeshData->m_pMesh ); + meshBuilder.SetCompressionType( T ); + for ( int j=0; j < meshBuilder.VertexCount() ; j++) + { + meshBuilder.SelectVertex(j); + int n = pMeshData->m_pGroupIndexToMeshIndex[j]; + mstudiovertex_t &vert = pVertices[n]; + + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.CompressedNormal3fv<T>( vert.m_vecNormal.Base() ); + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + + if (pStudioTangentS) + { + Assert( pStudioTangentS[n].w == -1.0f || pStudioTangentS[n].w == 1.0f ); + meshBuilder.CompressedUserData<T>( pStudioTangentS[n].Base() ); + } + + meshBuilder.Color4ub( 255, 255, 255, 255 ); + } + meshBuilder.EndModify(); +} + +//----------------------------------------------------------------------------- +// Draws a mesh using hardware + software skinning +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawGroupHWSkin( IMatRenderContext *pRenderContext, studiomeshgroup_t* pGroup, IMesh* pMesh, ColorMeshInfo_t * pColorMeshInfo ) +{ + PROFILE_STUDIO("HwSkin"); + int numTrianglesRendered = 0; + +#if PIX_ENABLE + char szPIXEventName[128]; + sprintf( szPIXEventName, "R_StudioDrawGroupHWSkin (%s)", m_pStudioHdr->name ); // PIX + PIXEVENT( pRenderContext, szPIXEventName ); +#endif + + if ( m_pStudioHdr->numbones == 1 ) + { + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadMatrix( m_PoseToWorld[0] ); + + // a single bone means all verts rigidly assigned + // any bonestatechange would needlessly re-load the same matrix + // xbox can skip further hw skinning, seems ok for pc too + pRenderContext->SetNumBoneWeights( 0 ); + } + + if ( pColorMeshInfo ) + pMesh->SetColorMesh( pColorMeshInfo->m_pMesh, pColorMeshInfo->m_nVertOffsetInBytes ); + else + pMesh->SetColorMesh( NULL, 0 ); + + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j]; + + if ( m_pStudioHdr->numbones > 1 ) + { + // Reset bone state if we're hardware skinning + pRenderContext->SetNumBoneWeights( pStrip->numBones ); + + for (int k = 0; k < pStrip->numBoneStateChanges; ++k) + { + OptimizedModel::BoneStateChangeHeader_t* pStateChange = pStrip->pBoneStateChange(k); + if ( pStateChange->newBoneID < 0 ) + break; + + pRenderContext->LoadBoneMatrix( pStateChange->hardwareID, m_PoseToWorld[pStateChange->newBoneID] ); + } + } + + pMesh->SetPrimitiveType( pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP ? + MATERIAL_TRIANGLE_STRIP : MATERIAL_TRIANGLES ); + + pMesh->Draw( pStrip->indexOffset, pStrip->numIndices ); + numTrianglesRendered += pGroup->m_pUniqueTris[j]; + } + pMesh->SetColorMesh( NULL, 0 ); + + return numTrianglesRendered; +} + +int CStudioRender::R_StudioDrawGroupSWSkin( studiomeshgroup_t* pGroup, IMesh* pMesh ) +{ + int numTrianglesRendered = 0; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + // Disable skinning + pRenderContext->SetNumBoneWeights( 0 ); + + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j]; + + // Choose our primitive type + pMesh->SetPrimitiveType( pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP ? + MATERIAL_TRIANGLE_STRIP : MATERIAL_TRIANGLES ); + + pMesh->Draw( pStrip->indexOffset, pStrip->numIndices ); + numTrianglesRendered += pGroup->m_pUniqueTris[j]; + } + + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Sets up the hw flex mesh +//----------------------------------------------------------------------------- +void CStudioRender::ComputeFlexWeights( int nFlexCount, mstudioflex_t *pFlex, MorphWeight_t *pWeights ) +{ + for ( int i = 0; i < nFlexCount; ++i, ++pFlex ) + { + MorphWeight_t &weight = pWeights[i]; + + weight.m_pWeight[MORPH_WEIGHT] = RampFlexWeight( *pFlex, m_pFlexWeights[ pFlex->flexdesc ] ); + weight.m_pWeight[MORPH_WEIGHT_LAGGED] = RampFlexWeight( *pFlex, m_pFlexDelayedWeights[ pFlex->flexdesc ] ); + + if ( pFlex->flexpair != 0 ) + { + weight.m_pWeight[MORPH_WEIGHT_STEREO] = RampFlexWeight( *pFlex, m_pFlexWeights[ pFlex->flexpair ] ); + weight.m_pWeight[MORPH_WEIGHT_STEREO_LAGGED] = RampFlexWeight( *pFlex, m_pFlexDelayedWeights[ pFlex->flexpair ] ); + } + else + { + weight.m_pWeight[MORPH_WEIGHT_STEREO] = weight.m_pWeight[MORPH_WEIGHT]; + weight.m_pWeight[MORPH_WEIGHT_STEREO_LAGGED] = weight.m_pWeight[MORPH_WEIGHT_LAGGED]; + } + } +} + + +//----------------------------------------------------------------------------- +// Computes a vertex format to use +//----------------------------------------------------------------------------- +inline VertexFormat_t CStudioRender::ComputeSWSkinVertexFormat( IMaterial *pMaterial ) const +{ + bool bDX8OrHigherVertex = IsX360() || ( UserDataSize( pMaterial->GetVertexFormat() ) != 0 ); + VertexFormat_t fmt = VERTEX_POSITION | VERTEX_NORMAL | VERTEX_COLOR | VERTEX_BONE_INDEX | + VERTEX_BONEWEIGHT( 2 ) | VERTEX_TEXCOORD_SIZE( 0, 2 ); + if ( bDX8OrHigherVertex ) + { + fmt |= VERTEX_USERDATA_SIZE( 4 ); + } + return fmt; +} + + +//----------------------------------------------------------------------------- +// Draws the mesh as tristrips using hardware +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawStaticMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, + studiomeshgroup_t* pGroup, StudioModelLighting_t lighting, + float r_blend, IMaterial* pMaterial, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + MatSysQueueMark( g_pMaterialSystem, "R_StudioDrawStaticMesh\n" ); + VPROF( "R_StudioDrawStaticMesh" ); + + int numTrianglesRendered = 0; + + bool bDoSoftwareLighting = !pColorMeshes && + ((m_pRC->m_Config.bSoftwareSkin != 0) || m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame || + (pMaterial ? pMaterial->NeedsSoftwareSkinning() : false) || + (m_pRC->m_Config.bSoftwareLighting != 0) || + ((lighting != LIGHTING_HARDWARE) && (lighting != LIGHTING_MOUTH) )); + + // software lighting case + if ( bDoSoftwareLighting || m_pRC->m_Config.m_bStatsMode == true ) + { + if ( m_pRC->m_Config.bNoSoftware ) + return 0; + + bool bNeedsTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false; + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity(); + + // Hardcode the vertex format to a well-known format to make sw skin code faster + VertexFormat_t fmt = ComputeSWSkinVertexFormat( pMaterial ); + bool bDX8Vertex = ( UserDataSize( fmt ) != 0 ); + + if ( m_pRC->m_Config.m_bStatsMode == false ) + { + Assert( ( pGroup->m_Flags & ( MESHGROUP_IS_FLEXED | MESHGROUP_IS_DELTA_FLEXED ) ) == 0 ); + } + + CMeshBuilder meshBuilder; + IMesh* pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh ); + meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 ); + + R_StudioSoftwareProcessMesh( pmesh, meshBuilder, + pGroup->m_NumVertices, pGroup->m_pGroupIndexToMeshIndex, + lighting, false, r_blend, bNeedsTangentSpace, bDX8Vertex, pMaterial); + + if ( m_pRC->m_Config.m_bStatsMode == true ) + { + R_GatherStats( pGroup, meshBuilder, pMesh, pMaterial ); + } + else + { + meshBuilder.End(); + + numTrianglesRendered = R_StudioDrawGroupSWSkin( pGroup, pMesh ); + } + + MatSysQueueMark( g_pMaterialSystem, "END R_StudioDrawStaticMesh\n" ); + return numTrianglesRendered; + } + + // Needed when we switch back and forth between hardware + software lighting + if ( IsPC() && pGroup->m_MeshNeedsRestore ) + { + VertexCompressionType_t compressionType = CompressionType( pGroup->m_pMesh->GetVertexFormat() ); + switch ( compressionType ) + { + case VERTEX_COMPRESSION_ON: + R_StudioRestoreMesh<VERTEX_COMPRESSION_ON>( pmesh, pGroup ); + case VERTEX_COMPRESSION_NONE: + default: + R_StudioRestoreMesh<VERTEX_COMPRESSION_NONE>( pmesh, pGroup ); + break; + } + pGroup->m_MeshNeedsRestore = false; + } + + // Build separate flex stream containing deltas, which will get copied into another vertex stream + bool bUseHWFlex = m_pRC->m_Config.m_bEnableHWMorph && pGroup->m_pMorph && !m_bDrawTranslucentSubModels; + bool bUseSOFlex = g_pMaterialSystemHardwareConfig->SupportsStreamOffset() && !bUseHWFlex; + if ( (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) && m_pRC->m_Config.bFlex ) + { + PIXEVENT( pRenderContext, "Delta Flex Processing" ); + if ( bUseHWFlex ) + { + pRenderContext->BindMorph( pGroup->m_pMorph ); + } + if ( bUseSOFlex ) + { + R_StudioProcessFlexedMesh_StreamOffset( pmesh, lod ); + R_StudioFlexMeshGroup( pGroup ); + } + } + + // Draw it baby + if ( pColorMeshes && ( pGroup->m_ColorMeshID != -1 ) ) + { + // draw using specified color mesh + numTrianglesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pGroup->m_pMesh, &(pColorMeshes[pGroup->m_ColorMeshID]) ); + } + else + { + numTrianglesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pGroup->m_pMesh, NULL ); + } + + if ( ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) && m_pRC->m_Config.bFlex ) + { + if ( bUseHWFlex ) + { + pRenderContext->BindMorph( NULL ); + } + if ( bUseSOFlex ) + { + pGroup->m_pMesh->DisableFlexMesh(); // clear flex stream + } + } + + MatSysQueueMark( g_pMaterialSystem, "END2 R_StudioDrawStaticMesh\n" ); + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Draws a dynamic mesh +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawDynamicMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, + studiomeshgroup_t* pGroup, StudioModelLighting_t lighting, + float r_blend, IMaterial* pMaterial, int lod ) +{ + VPROF( "R_StudioDrawDynamicMesh" ); + + bool doFlex = ((pGroup->m_Flags & MESHGROUP_IS_FLEXED) != 0) && m_pRC->m_Config.bFlex; + + bool doSoftwareLighting = (m_pRC->m_Config.bSoftwareLighting != 0) || + ((lighting != LIGHTING_HARDWARE) && (lighting != LIGHTING_MOUTH) ); + + bool swSkin = doSoftwareLighting || m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame || + ((pGroup->m_Flags & MESHGROUP_IS_HWSKINNED) == 0) || + m_pRC->m_Config.bSoftwareSkin || + ( pMaterial ? pMaterial->NeedsSoftwareSkinning() : false ); + + if ( !doFlex && !swSkin ) + { + return R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, r_blend, pMaterial, lod, NULL ); + } + + // drawers before this might not need the vertexes, so don't pay the penalty of getting them + // everybody else past this point (flex or swskinning) expects to read vertexes + // get vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return 0; + } + + MatSysQueueMark( g_pMaterialSystem, "R_StudioDrawDynamicMesh\n" ); + + int numTrianglesRendered = 0; + +#ifdef _DEBUG + const char *pDebugMaterialName = NULL; + if ( pMaterial ) + { + pDebugMaterialName = pMaterial->GetName(); + } +#endif + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity(); + + // Software flex verts (not a delta stream) + if ( doFlex ) + { + R_StudioFlexVerts( pmesh, lod ); + } + + IMesh* pMesh; + bool bNeedsTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false; + + VertexFormat_t fmt = ComputeSWSkinVertexFormat( pMaterial ); + bool bDX8Vertex = ( UserDataSize( fmt ) != 0 ); + + CMeshBuilder meshBuilder; + pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh); + meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 ); + + if ( swSkin ) + { + R_StudioSoftwareProcessMesh( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, + bNeedsTangentSpace, bDX8Vertex, pMaterial ); + } + else if ( doFlex ) + { + R_StudioProcessFlexedMesh( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex ); + } + + meshBuilder.End(); + + // Draw it baby + if ( !swSkin ) + { + numTrianglesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pMesh ); + } + else + { + numTrianglesRendered = R_StudioDrawGroupSWSkin( pGroup, pMesh ); + } + + if ( m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame ) + { + pRenderContext->SetNumBoneWeights( 0 ); + pRenderContext->Bind( m_pMaterialTangentFrame ); + + CMeshBuilder meshBuilder; + pMesh = pRenderContext->GetDynamicMesh( false ); + meshBuilder.Begin( pMesh, MATERIAL_LINES, pGroup->m_NumVertices ); + + R_StudioSoftwareProcessMesh_Normals( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, m_pRC->m_Config.bDrawNormals, m_pRC->m_Config.bDrawTangentFrame ); + meshBuilder.End( ); + + pMesh->Draw(); + pRenderContext->Bind( pMaterial ); + } + + MatSysQueueMark( g_pMaterialSystem, "END R_StudioDrawDynamicMesh\n" ); + + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Sets the material vars for the eye vertex shader +//----------------------------------------------------------------------------- +static unsigned int eyeOriginCache = 0; +static unsigned int eyeUpCache = 0; +static unsigned int irisUCache = 0; +static unsigned int irisVCache = 0; +static unsigned int glintUCache = 0; +static unsigned int glintVCache = 0; +void CStudioRender::SetEyeMaterialVars( IMaterial* pMaterial, mstudioeyeball_t* peyeball, + Vector const& eyeOrigin, const matrix3x4_t& irisTransform, const matrix3x4_t& glintTransform ) +{ + if ( !pMaterial ) + return; + + IMaterialVar* pVar = pMaterial->FindVarFast( "$eyeorigin", &eyeOriginCache ); + if (pVar) + { + pVar->SetVecValue( eyeOrigin.Base(), 3 ); + } + + pVar = pMaterial->FindVarFast( "$eyeup", &eyeUpCache ); + if (pVar) + { + pVar->SetVecValue( peyeball->up.Base(), 3 ); + } + pVar = pMaterial->FindVarFast( "$irisu", &irisUCache ); + if (pVar) + { + pVar->SetVecValue( irisTransform[0], 4 ); + } + + pVar = pMaterial->FindVarFast( "$irisv", &irisVCache ); + if (pVar) + { + pVar->SetVecValue( irisTransform[1], 4 ); + } + + pVar = pMaterial->FindVarFast( "$glintu", &glintUCache ); + if (pVar) + { + pVar->SetVecValue( glintTransform[0], 4 ); + } + + pVar = pMaterial->FindVarFast( "$glintv", &glintVCache ); + if (pVar) + { + pVar->SetVecValue( glintTransform[1], 4 ); + } +} + + +//----------------------------------------------------------------------------- +// Specialized routine to draw the eyeball +//----------------------------------------------------------------------------- +static unsigned int glintCache = 0; +int CStudioRender::R_StudioDrawEyeball( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData, + StudioModelLighting_t lighting, IMaterial *pMaterial, int lod ) +{ + if ( !m_pRC->m_Config.bEyes ) + { + return 0; + } + + // FIXME: We could compile a static vertex buffer in this case + // if there's no flexed verts. + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return 0; + } + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + int j; + int numTrianglesRendered = 0; + + // See if any meshes in the group want to go down the static path... + bool bIsDeltaFlexed = false; + bool bIsHardwareSkinnedData = false; + bool bIsFlexed = false; + for (j = 0; j < pMeshData->m_NumGroup; ++j) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + + if ( ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) && g_pMaterialSystemHardwareConfig->SupportsStreamOffset() ) + bIsDeltaFlexed = true; + + if ( pGroup->m_Flags & MESHGROUP_IS_FLEXED ) + bIsFlexed = true; + + if ( pGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) + bIsHardwareSkinnedData = true; + } + + // Take the static path for new flexed models on DX9 hardware + bool bFlexStatic = bIsDeltaFlexed && g_pMaterialSystemHardwareConfig->SupportsStreamOffset(); + bool bShouldHardwareSkin = bIsHardwareSkinnedData && ( !bIsFlexed || bFlexStatic ) && + ( lighting != LIGHTING_SOFTWARE ) && ( !m_pRC->m_Config.bSoftwareSkin ); + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity(); + + // Software flex eyeball verts (not a delta stream) + if ( bIsFlexed && ( !bFlexStatic || !bShouldHardwareSkin ) ) + { + R_StudioFlexVerts( pmesh, lod ); + } + + mstudioeyeball_t *peyeball = m_pSubModel->pEyeball(pmesh->materialparam); + + // We'll need this to compute normals + Vector org; + VectorTransform( peyeball->org, m_pBoneToWorld[peyeball->bone], org ); + + // Compute the glint projection + matrix3x4_t glintMat; + ComputeGlintTextureProjection( &m_pEyeballState[pmesh->materialparam], m_pRC->m_ViewRight, m_pRC->m_ViewUp, glintMat ); + + if ( !m_pRC->m_Config.bWireframe ) + { + // Compute the glint procedural texture + IMaterialVar* pGlintVar = pMaterial->FindVarFast( "$glint", &glintCache ); + if (pGlintVar) + { + R_StudioEyeballGlint( &m_pEyeballState[pmesh->materialparam], pGlintVar, m_pRC->m_ViewRight, m_pRC->m_ViewUp, m_pRC->m_ViewOrigin ); + } + SetEyeMaterialVars( pMaterial, peyeball, org, m_pEyeballState[pmesh->materialparam].mat, glintMat ); + } + + if ( bShouldHardwareSkin ) + { + for ( j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + numTrianglesRendered += R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod, NULL ); + } + + return numTrianglesRendered; + } + + pRenderContext->SetNumBoneWeights( 0 ); + m_VertexCache.SetupComputation( pmesh ); + + int nAlpnaInt = RoundFloatToInt( m_pRC->m_AlphaMod * 255 ); + unsigned char a = clamp( nAlpnaInt, 0, 255 ); + + Vector position, normal, color; + + // setup the call + R_InitLightEffectsWorld3(); + + // Render the puppy + CMeshBuilder meshBuilder; + + bool useHWLighting = m_pRC->m_Config.m_bSupportsVertexAndPixelShaders && !m_pRC->m_Config.bSoftwareLighting; + // Draw all the various mesh groups... + for ( j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + + IMesh* pMesh = pRenderContext->GetDynamicMesh(false, 0, pGroup->m_pMesh); + + // garymcthack! need to look at the strip flags to figure out what it is. + meshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, pmesh->numvertices, 0 ); +// meshBuilder.Begin( pMesh, MATERIAL_TRIANGLE_STRIP, pmesh->numvertices, 0 ); + //VPROF_INCREMENT_COUNTER( "TransformFlexVerts", pGroup->m_NumVertices ); + + for ( int i=0; i < pGroup->m_NumVertices; ++i) + { + int n = pGroup->m_pGroupIndexToMeshIndex[i]; + mstudiovertex_t &vert = pVertices[n]; + + CachedPosNorm_t* pWorldVert = m_VertexCache.CreateWorldVertex(n); + + // transform into world space + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexVert = m_VertexCache.GetFlexVertex(n); + R_StudioTransform( pFlexVert->m_Position, &vert.m_BoneWeights, pWorldVert->m_Position.AsVector3D() ); + R_StudioRotate( pFlexVert->m_Normal, &vert.m_BoneWeights, pWorldVert->m_Normal.AsVector3D() ); + Assert( pWorldVert->m_Normal.x >= -1.05f && pWorldVert->m_Normal.x <= 1.05f ); + Assert( pWorldVert->m_Normal.y >= -1.05f && pWorldVert->m_Normal.y <= 1.05f ); + Assert( pWorldVert->m_Normal.z >= -1.05f && pWorldVert->m_Normal.z <= 1.05f ); + } + else + { + R_StudioTransform( vert.m_vecPosition, &vert.m_BoneWeights, pWorldVert->m_Position.AsVector3D() ); + R_StudioRotate( vert.m_vecNormal, &vert.m_BoneWeights, pWorldVert->m_Normal.AsVector3D() ); + Assert( pWorldVert->m_Normal.x >= -1.05f && pWorldVert->m_Normal.x <= 1.05f ); + Assert( pWorldVert->m_Normal.y >= -1.05f && pWorldVert->m_Normal.y <= 1.05f ); + Assert( pWorldVert->m_Normal.z >= -1.05f && pWorldVert->m_Normal.z <= 1.05f ); + } + + // Don't bother to light in software when we've got vertex + pixel shaders. + meshBuilder.Position3fv( pWorldVert->m_Position.Base() ); + + if (useHWLighting) + { + meshBuilder.Normal3fv( pWorldVert->m_Normal.Base() ); + } + else + { + R_StudioEyeballNormal( peyeball, org, pWorldVert->m_Position.AsVector3D(), pWorldVert->m_Normal.AsVector3D() ); + + // This isn't really used, but since the meshbuilder checks for messed up + // normals, let's do this here in debug mode. + // WRONGO YOU FRIGGIN IDIOT!!!!!!!!!! + // DX7 needs these for the flashlight. + meshBuilder.Normal3fv( pWorldVert->m_Normal.Base() ); + R_ComputeLightAtPoint3( pWorldVert->m_Position.AsVector3D(), pWorldVert->m_Normal.AsVector3D(), color ); + + unsigned char r = LinearToLightmap( color.x ); + unsigned char g = LinearToLightmap( color.y ); + unsigned char b = LinearToLightmap( color.z ); + + meshBuilder.Color4ub( r, g, b, a ); + } + + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + + // FIXME: For now, flexed hw-skinned meshes can only have one bone + // The data must exist in the 0th hardware matrix + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End(); + pMesh->Draw(); + + for (int k=0; k<pGroup->m_NumStrips; k++) + { + numTrianglesRendered += pGroup->m_pUniqueTris[k]; + } + + if ( m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame ) + { + pRenderContext->SetNumBoneWeights( 0 ); + pRenderContext->Bind( m_pMaterialTangentFrame ); + + CMeshBuilder meshBuilder; + pMesh = pRenderContext->GetDynamicMesh( false ); + meshBuilder.Begin( pMesh, MATERIAL_LINES, pGroup->m_NumVertices ); + + bool doFlex = true; + bool r_blend = false; + R_StudioSoftwareProcessMesh_Normals( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, m_pRC->m_Config.bDrawNormals, m_pRC->m_Config.bDrawTangentFrame ); + meshBuilder.End( ); + + pMesh->Draw(); + pRenderContext->Bind( pMaterial ); + } + } + + return numTrianglesRendered; +} + + + +//----------------------------------------------------------------------------- +// Draws a mesh +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData, + StudioModelLighting_t lighting, IMaterial *pMaterial, + ColorMeshInfo_t *pColorMeshes, int lod ) +{ + VPROF( "R_StudioDrawMesh" ); + + int numTrianglesRendered = 0; + + // Draw all the various mesh groups... + for ( int j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + + // Older models are merely flexed while new ones are also delta flexed + bool bIsFlexed = (pGroup->m_Flags & MESHGROUP_IS_FLEXED) != 0; + bool bIsDeltaFlexed = (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) != 0; + + // Take the static path for new flexed models on DX9 hardware + bool bFlexStatic = ( bIsDeltaFlexed && g_pMaterialSystemHardwareConfig->SupportsStreamOffset() ); + + // Use the hardware if the mesh is hw skinned and we can put flexes on another stream + // Otherwise, we gotta do some expensive locks + bool bIsHardwareSkinnedData = ( pGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) != 0; + bool bShouldHardwareSkin = bIsHardwareSkinnedData && ( !bIsFlexed || bFlexStatic ) && + ( lighting != LIGHTING_SOFTWARE ); + + if ( bShouldHardwareSkin && !m_pRC->m_Config.bDrawNormals && !m_pRC->m_Config.bDrawTangentFrame && !m_pRC->m_Config.bWireframe ) + { + if ( !m_pRC->m_Config.bNoHardware ) + { + numTrianglesRendered += R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod, pColorMeshes ); + } + } + else + { + if ( !m_pRC->m_Config.bNoSoftware ) + { + numTrianglesRendered += R_StudioDrawDynamicMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod ); + } + } + } + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Inserts translucent mesh into list +//----------------------------------------------------------------------------- +template< class T > +void InsertRenderable( int mesh, T val, int count, int* pIndices, T* pValList ) +{ + // Compute insertion point... + int i; + for ( i = count; --i >= 0; ) + { + if (val < pValList[i]) + break; + + // Shift down + pIndices[i + 1] = pIndices[i]; + pValList[i+1] = pValList[i]; + } + + // Insert at insertion point + ++i; + pValList[i] = val; + pIndices[i] = mesh; +} + + +//----------------------------------------------------------------------------- +// Sorts the meshes +//----------------------------------------------------------------------------- +int CStudioRender::SortMeshes( int* pIndices, IMaterial **ppMaterials, + short* pskinref, Vector const& vforward, Vector const& r_origin ) +{ + int numMeshes = 0; + if (m_bDrawTranslucentSubModels) + { +// float* pDist = (float*)_alloca( m_pSubModel->nummeshes * sizeof(float) ); + + // Sort each model piece by it's center, if it's translucent + for (int i = 0; i < m_pSubModel->nummeshes; ++i) + { + // Don't add opaque materials + mstudiomesh_t* pmesh = m_pSubModel->pMesh(i); + IMaterial *pMaterial = ppMaterials[pskinref[pmesh->material]]; + if( !pMaterial || !pMaterial->IsTranslucent() ) + continue; + + // FIXME: put the "center" of the mesh into delta +// Vector delta; +// VectorSubtract( delta, r_origin, delta ); +// float dist = DotProduct( delta, vforward ); + + // Add it to our lists +// InsertRenderable( i, dist, numMeshes, pIndices, pDist ); + + // One more mesh + ++numMeshes; + } + } + else + { + IMaterial** ppMat = (IMaterial**)_alloca( m_pSubModel->nummeshes * sizeof(IMaterial*) ); + + // Sort by material type + for (int i = 0; i < m_pSubModel->nummeshes; ++i) + { + mstudiomesh_t* pmesh = m_pSubModel->pMesh(i); + IMaterial *pMaterial = ppMaterials[pskinref[pmesh->material]]; + if( !pMaterial ) + continue; + + // Don't add translucent materials + if (( !m_pRC->m_Config.bWireframe ) && pMaterial->IsTranslucent() ) + continue; + + // Add it to our lists + InsertRenderable( i, pMaterial, numMeshes, pIndices, ppMat ); + + // One more mesh + ++numMeshes; + } + } + + return numMeshes; +} + +//----------------------------------------------------------------------------- +// R_StudioDrawPoints +// +// Returns the number of triangles rendered. +//----------------------------------------------------------------------------- +#pragma warning (disable:4189) +int CStudioRender::R_StudioDrawPoints( IMatRenderContext *pRenderContext, int skin, void /*IClientEntity*/ *pClientEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + VPROF( "R_StudioDrawPoints" ); + int i; + int numTrianglesRendered = 0; + +#if 0 // garymcthack + if ( m_pSubModel->numfaces == 0 ) + return 0; +#endif + + // happens when there's a model load failure + if ( m_pStudioMeshes == 0 ) + return 0; + + if ( m_pRC->m_Config.bWireframe && m_bDrawTranslucentSubModels ) + return 0; + + // ConDMsg("%d: %d %d\n", pimesh->numFaces, pimesh->numVertices, pimesh->numNormals ); + if ( m_pRC->m_Config.skin ) + { + skin = m_pRC->m_Config.skin; + if ( skin >= m_pStudioHdr->numskinfamilies ) + { + skin = 0; + } + } + + // get skinref array + short *pskinref = m_pStudioHdr->pSkinref( 0 ); + if ( skin > 0 && skin < m_pStudioHdr->numskinfamilies ) + { + pskinref += ( skin * m_pStudioHdr->numskinref ); + } + + // FIXME: Activate sorting on a mesh level +// int* pIndices = (int*)_alloca( m_pSubModel->nummeshes * sizeof(int) ); +// int numMeshes = SortMeshes( pIndices, ppMaterials, pskinref, vforward, r_origin ); + + // draw each mesh + for ( i = 0; i < m_pSubModel->nummeshes; ++i) + { + mstudiomesh_t *pmesh = m_pSubModel->pMesh(i); + studiomeshdata_t *pMeshData = &m_pStudioMeshes[pmesh->meshid]; + Assert( pMeshData ); + + if ( !pMeshData->m_NumGroup ) + continue; + + if ( !pMaterialFlags ) + continue; + + StudioModelLighting_t lighting = LIGHTING_HARDWARE; + int materialFlags = pMaterialFlags[pskinref[pmesh->material]]; + + IMaterial* pMaterial = R_StudioSetupSkinAndLighting( pRenderContext, pskinref[ pmesh->material ], ppMaterials, materialFlags, pClientEntity, pColorMeshes, lighting ); + if ( !pMaterial ) + continue; + +#ifdef _DEBUG + char const *materialName = pMaterial->GetName(); +#endif + // Set up flex data + m_VertexCache.SetMesh( i ); + + // The following are special cases that can't be covered with + // the normal static/dynamic methods due to optimization reasons + switch ( pmesh->materialtype ) + { + case 1: + // eyeballs + numTrianglesRendered += R_StudioDrawEyeball( pRenderContext, pmesh, pMeshData, lighting, pMaterial, lod ); + break; + + default: + numTrianglesRendered += R_StudioDrawMesh( pRenderContext, pmesh, pMeshData, lighting, pMaterial, pColorMeshes, lod ); + break; + } + } + + // Reset this state so it doesn't hose other parts of rendering + pRenderContext->SetNumBoneWeights( 0 ); + + return numTrianglesRendered; +} +#pragma warning (default:4189) diff --git a/studiorender/r_studiodraw_computeflexedvertex.cpp b/studiorender/r_studiodraw_computeflexedvertex.cpp new file mode 100644 index 0000000..b58b90d --- /dev/null +++ b/studiorender/r_studiodraw_computeflexedvertex.cpp @@ -0,0 +1,1621 @@ +//========= Copyright c 1996-2008, Valve Corporation, All rights reserved. ============// + +#include "tier0/platform.h" + +#ifdef PLATFORM_WINDOWS + +#include "studiorender.h" +#include "studio.h" +#include "materialsystem/imesh.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/imorph.h" +#include "materialsystem/itexture.h" +#include "materialsystem/imaterial.h" +#include "optimize.h" +#include "mathlib/mathlib.h" +#include "mathlib/vector.h" +#include <malloc.h> +#include "mathlib/vmatrix.h" +#include "studiorendercontext.h" +#include "tier2/tier2.h" +#include "tier0/vprof.h" +//#include "tier0/miniprofiler.h" +#include <algorithm> +#include "filesystem.h" + +#define PROFILE_THIS_FILE 0 + + +//DLL_IMPORT CLinkedMiniProfiler *g_pOtherMiniProfilers; +#if PROFILE_THIS_FILE + +#if !ENABLE_HARDWARE_PROFILER +#error "can't profile without profiler enabled" +#endif + +CLinkedMiniProfiler g_mp_morph_Vx("morph_Vx", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_Vw("morph_Vw", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_lower_bound("morph_lower_bound", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph("morph", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V1("morph_V1", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V2("morph_V2", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V3("morph_V3", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V4("morph_V4", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V5("morph_V5", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V6("morph_V6", &g_pOtherMiniProfilers); +CLinkedMiniProfiler g_mp_morph_V7("morph_V7", &g_pOtherMiniProfilers); + +CLinkedMiniProfiler* g_mp_ComputeFlexedVertex_StreamOffset[8] = +{ + NULL, + &g_mp_morph_V1, + &g_mp_morph_V2, + &g_mp_morph_V3, + &g_mp_morph_V4, + &g_mp_morph_V5, + &g_mp_morph_V6, + &g_mp_morph_V7 +}; +#else +uint32 g_mp_morph_Vx[2]; +uint32 g_mp_morph_Vw[2]; +#endif + +#ifdef _X360 +ConVar g_cv_morph_path("morph_path", "7"); +#ifdef _DEBUG +ConVar g_cv_morph_debug("morph_debug", "0"); +#endif // _DEBUG +#endif // _X360 + + +#ifdef _X360 +const ALIGN16 int32 g_perm_speed_side[4] = {0x12, 0x13, 0x12, 0x13}; +const ALIGN16 int32 g_perm_delta[4] = {0x14150000, 0x16170000, 0x18190000, 0}; +const ALIGN16 int32 g_perm_delta_wrinkle[4] = {0x14150000, 0x16170000, 0x18190000, 0x10110000}; // includes the f3PreDelta's W that's in the X component +const ALIGN16 int32 g_perm_ndelta[4] = {0x1A1B0000, 0x1C1D0000, 0x1E1F0000, 0}; +//const ALIGN16 int32 g_perm_w0[4] = {0x00010203,0x08090A0B,0x00010203,0x08090A0B}; +const ALIGN16 int32 g_perm_w1[4] = {0x0C0D0E0F,0x0C0D0E0F,0x04050607,0x04050607}; +const fltx4 g_sc256_255_special = {256.0f/255.0f,256.0f/255.0f,-256.0f/255.0f,-256.0f/255.0f}; +const fltx4 g_f40011 = {0,0,1,1}; +fltx4 g_dummy2[2]; + +int g_nStreamOffset_prefetch = 256; + + + + + + + + + + + + + + +// +// V4 rolled - latency of x4, manually scheduled for nearly optimal dual-issue and no automatic stalls +// the ~15 nops mean 1 instruction is issued at that cycle, instead of theoretically possible 2 per cycle +// +__declspec(naked) int ComputeFlexedVertex_StreamOffset_V7( + int nThinFlexVertexCount, //r3 + CachedPosNorm_t *pThinFlexVerts,//r4 + int32 *pFirstThinFlexIndex, //r5 + mstudiovertanim_t * pVert, //r6 + uint32 nCurrentTag, //r7 + uint32 numVertsToProcess, //r8 + fltx4 w1234 //vr1 + ) +{ + __asm + { + std r14, -0x08(r1) + std r15, -0x10(r1) + std r16, -0x18(r1) + std r17, -0x20(r1) + std r18, -0x28(r1) + std r19, -0x30(r1) + std r20, -0x38(r1) + std r21, -0x40(r1) + std r22, -0x48(r1) + std r23, -0x50(r1) + std r24, -0x58(r1) + std r25, -0x60(r1) + + // let the compiler schedule the instructions, just use several registers to avoid dependencies + lau r14, g_sc256_255_special + lal r14, r14, g_sc256_255_special + lvx vr2, r0,r14 + + lau r15, g_f40011 + lal r15, r15, g_f40011 + lvx vr3, r0,r15 + + lau r16, g_perm_speed_side + lal r16, r16, g_perm_speed_side + lvx vr4, r0,r16 + + lau r17, g_perm_delta + lal r17, r17, g_perm_delta + lvx vr5, r0,r17 + + lau r18, g_perm_ndelta + lal r18, r18, g_perm_ndelta + lvx vr6, r0,r18 + + lau r20, g_dummy2 + lal r20,r20, g_dummy2 + mr r21, r20 + mr r22, r21 + mr r23, r22 + + li r10, -1 + rldicl r7,r7,0,32 // currentTag &= 0xFFFFFFFF ; just to make sure we don't mess up isCacheInvalid computation + rldicl r10,r10,0,48 // r10 = 0x0000FFFF + + vxor vr8,vr8,vr8 + + li r15, 16 + + li r11,0x100 + li r24, MAXSTUDIOFLEXVERTS - 4 + + mtctr r8 + mftb r25 + vxor vr19,vr19,vr19 + vxor vr20,vr20,vr20 + nop // align! + nop + nop + +label_start_V7: // 52 instructions run in 45 cycles, although compiler predicts 38 cycles + //////////////// + // IMPORTANT: DO NOT REMOVE NOPS UNLESS YOU KNOW WHAT YOU ARE DOING AND WHY! + // nops are essential here, removing them will make the code about 2% slower because dual-issue will be broken + //////////////// + lhz r14, 0(r6) // int n = pVert->index; + addi r16, r3, 2 + dcbt r11,r6 + cmpw r3, r24 // compare nThinFlexVertexCount to MAXSTUDIOFLEXVERTS - 2 + lvlx vr9,r0,r6 + rldicl r14, r14, 2, 0 // r14 = n*4 + lvrx vr10,r15,r6 + rldicl r16, r16, 5, 0 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + vor vr9,vr9,vr10 // vr9 = packedVert = LoadUnalignedSIMD(pVert) + addi r31,r31,0//vpermwi128 vr40,vr40,0x1B //mr r31,r31 + add r16, r16, r4 + vpermwi128 vr40,vr40,0x1B //mr r30,r30 + addi r6, r6, 0x10 // pVert++ + vpermwi128 vr41,vr41,0x1B//nop + lwzx r17, r14, r5 // r17 = oldCache + //addi r30,r30,0//nop + vperm vr10, vr8, vr9, vr4 + //addi r29,r29,0//nop + xor r18, r17, r7 // cacheVertexIndex = oldCache^nCurrentTag + vperm vr11, vr8, vr9, vr5 + stvx vr8, r0,r16 + /*S:2*/ vmsum4fp128 vr29,vr19, vr1 // vr29 = scWeight + subf r18,r18,r10 // (0xFFFF-cacheVertexIndex) >> 32 + /*S:1*/ vpermwi128 vr25, vr20, 0x22 // depends on vmadd vr20 = f4sb + stvx vr8, r15,r16 + /*S:1*/ vpermwi128 vr26, vr20, 0xF5 + vcsxwfp vr10,vr10,8 + or r19,r3,r7 + vperm vr12, vr8, vr9, vr6 + sradi r18,r18,32 // r18 = isCacheInvalid : form mask + /*S:3*/ stvx vr30, r0,r23 + //nop + /*S:3*/ stvx vr31, r15,r23 + //nop + andc r17, r17, r18 // r17 = oldCache & ~isCacheInvalid + //nop + subf r3, r18, r3 // nThinFlexVertexCount = nThinFlexVertexCount + (isCacheInvalid&1); + //nop + and r19,r19,r18 // r19 = newCache & isCacheInvalid + //nop + /*S:2*/mr r23,r22 + //nop + or r19, r19, r17 // r19 = updateCache + /*S:2*/ lvx vr13, r0,r22 // vr13 = vfPosition + /*S:2*/ lvx vr14, r15,r22 // vr14 = vfNormal + //nop + rldicl r17, r19, 5,43 // r17 = (updateCache & 0xFFFF) * 32 = nVertexIndex * 32 + //nop + /*S:1*/ vmulfp128 vr19, vr25, vr26 + /*S:1*/mr r22, r21 + vmaddfp vr20, vr10, vr2, vr3 // vr20 = f4sb + add r21, r17, r4 // r21 = pFlexedVertex, goes to Stage:1 + /*S:2*/ vmaddfp vr30, vr29, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + stwx r19, r14, r5 + /*S:2*/ vmaddfp vr31, vr29, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + /*S:1*/ vpermwi128 vr21, vr32, 0x1B + /*S:1*/ vpermwi128 vr22, vr33, 0x1B + vcsxwfp128 vr32, vr11, 28 + //nop + vcsxwfp128 vr33, vr12, 28 + bgt label_end_V7 + dcbt r11, r21 + bdnz label_start_V7 +label_end_V7: + + /*S:2*/ vmsum4fp128 vr29,vr19, vr1 // vr29 = scWeight + /*S:1*/ vpermwi128 vr25, vr20, 0x22 // depends on vmadd vr20 = f4sb + /*S:1*/ vpermwi128 vr26, vr20, 0xF5 + /*S:3*/ stvx vr30, r0,r23 + /*S:3*/ stvx vr31, r15,r23 + /*S:2*/mr r23,r22 + /*S:2*/ lvx vr13, r0,r22 // vr13 = vfPosition + /*S:2*/ lvx vr14, r15,r22 // vr14 = vfNormal + /*S:1*/ vmulfp128 vr19, vr25, vr26 + /*S:1*/mr r22, r21 + /*S:2*/ vmaddfp vr30, vr29, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + /*S:2*/ vmaddfp vr31, vr29, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + /*S:1*/ vpermwi128 vr21, vr32, 0x1B + /*S:1*/ vpermwi128 vr22, vr33, 0x1B + + + /*S:2*/ vmsum4fp128 vr29,vr19, vr1 // vr29 = scWeight + /*S:3*/ stvx vr30, r0,r23 + /*S:3*/ stvx vr31, r15,r23 + /*S:2*/mr r23,r22 + /*S:2*/ lvx vr13, r0,r22 // vr13 = vfPosition + /*S:2*/ lvx vr14, r15,r22 // vr14 = vfNormal + /*S:2*/ vmaddfp vr30, vr29, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + /*S:2*/ vmaddfp vr31, vr29, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + + /*S:3*/ stvx vr30, r0,r23 + /*S:3*/ stvx vr31, r15,r23 + + mftb r17 + subf r17, r25, r17 + lau r18, g_mp_morph_Vx + lal r18, r18, g_mp_morph_Vx + lwz r23, 0(r18) + add r23,r23,r17 + stw r23, 0(r18) + lwz r23, 4(r18) + add r23,r23,r8 + stw r23, 4(r18) + + ld r14, -0x08(r1) + ld r15, -0x10(r1) + ld r16, -0x18(r1) + ld r17, -0x20(r1) + ld r18, -0x28(r1) + ld r19, -0x30(r1) + ld r20, -0x38(r1) + ld r21, -0x40(r1) + ld r22, -0x48(r1) + ld r23, -0x50(r1) + ld r24, -0x58(r1) + ld r25, -0x60(r1) + + blr + } +} + + + + +__declspec(naked) int ComputeFlexedVertexWrinkle_StreamOffset_V7( + int nThinFlexVertexCount, //r3 + CachedPosNorm_t *pThinFlexVerts,//r4 + int32 *pFirstThinFlexIndex, //r5 + mstudiovertanim_wrinkle_t * pVert, //r6 + uint32 nCurrentTag, //r7 + uint32 numVertsToProcess, //r8 + fltx4 w1234 //vr1 + ) +{ + __asm + { + std r14, -0x08(r1) + std r15, -0x10(r1) + std r16, -0x18(r1) + std r17, -0x20(r1) + std r18, -0x28(r1) + std r19, -0x30(r1) + std r20, -0x38(r1) + std r21, -0x40(r1) + std r22, -0x48(r1) + std r23, -0x50(r1) + std r24, -0x58(r1) + std r25, -0x60(r1) + + // let the compiler schedule the instructions, just use several registers to avoid dependencies + lau r14, g_sc256_255_special + lal r14, r14, g_sc256_255_special + lvx vr2, r0,r14 + + lau r15, g_f40011 + lal r15, r15, g_f40011 + lvx vr3, r0,r15 + + lau r16, g_perm_speed_side + lal r16, r16, g_perm_speed_side + lvx vr4, r0,r16 + + lau r17, g_perm_delta_wrinkle + lal r17, r17, g_perm_delta_wrinkle + lvx vr5, r0,r17 + + lau r18, g_perm_ndelta + lal r18, r18, g_perm_ndelta + lvx vr6, r0,r18 + + lau r20, g_dummy2 + lal r20,r20, g_dummy2 + mr r21, r20 + mr r22, r21 + mr r23, r22 + + li r10, -1 + rldicl r7,r7,0,32 // currentTag &= 0xFFFFFFFF ; just to make sure we don't mess up isCacheInvalid computation + rldicl r10,r10,0,48 // r10 = 0x0000FFFF + + vxor vr8,vr8,vr8 + + li r15, 16 + + li r11,0x100 + li r24, MAXSTUDIOFLEXVERTS - 4 + + mtctr r8 + mftb r25 + vxor vr19,vr19,vr19 + vxor vr20,vr20,vr20 + nop // align! + nop + nop + +label_start_V7: // 52 instructions run in 45 cycles, although compiler predicts 38 cycles + //////////////// + // IMPORTANT: DO NOT REMOVE NOPS UNLESS YOU KNOW WHAT YOU ARE DOING AND WHY! + // nops are essential here, removing them will make the code about 2% slower because dual-issue will be broken + //////////////// + lhz r14, 0(r6) // int n = pVert->index; + addi r16, r3, 2 + dcbt r11,r6 + cmpw r3, r24 // compare nThinFlexVertexCount to MAXSTUDIOFLEXVERTS - 2 + lvlx vr9,r0,r6 + rldicl r14, r14, 2, 0 // r14 = n*4 + lvrx vr10,r15,r6 + rldicl r16, r16, 5, 0 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + lvlx vr27,r15,r6 // f3PreDelta + vor vr9,vr9,vr10 // vr9 = packedVert = LoadUnalignedSIMD(pVert) + addi r31,r31,0//vpermwi128 vr40,vr40,0x1B //mr r31,r31 + add r16, r16, r4 + vpermwi128 vr40,vr40,0x1B //mr r30,r30 + addi r6, r6, 0x12 // pVert++ + vpermwi128 vr41,vr41,0x1B//nop + lwzx r17, r14, r5 // r17 = oldCache + //addi r30,r30,0//nop + vperm vr10, vr8, vr9, vr4 //__vperm(f4Zero, packedVert, permuteSpeedSide) + vrlimi128 vr27,vr9,7,0// f3PreDelta + xor r18, r17, r7 // cacheVertexIndex = oldCache^nCurrentTag + vperm vr12, vr8, vr9, vr6 //f3NDelta = __vperm(f4Zero, packedVert, permuteNDelta) + stvx vr8, r0,r16 + /*S:2*/ vmsum4fp128 vr29,vr19, vr1 // vr29 = scWeight + subf r18,r18,r10 // (0xFFFF-cacheVertexIndex) >> 32 + /*S:1*/ vpermwi128 vr25, vr20, 0x22 // depends on vmadd vr20 = f4sb + stvx vr8, r15,r16 + /*S:1*/ vpermwi128 vr26, vr20, 0xF5 + vcsxwfp vr10,vr10,8 + or r19,r3,r7 + vperm vr11, vr8, vr27, vr5 //f3Delta = __vperm(f4Zero, f3PreDelta, permuteDelta) + sradi r18,r18,32 // r18 = isCacheInvalid : form mask + /*S:3*/ stvx vr30, r0,r23 + //nop + /*S:3*/ stvx vr31, r15,r23 + //nop + andc r17, r17, r18 // r17 = oldCache & ~isCacheInvalid + //nop + subf r3, r18, r3 // nThinFlexVertexCount = nThinFlexVertexCount + (isCacheInvalid&1); + //nop + and r19,r19,r18 // r19 = newCache & isCacheInvalid + //nop + /*S:2*/mr r23,r22 + //nop + or r19, r19, r17 // r19 = updateCache + /*S:2*/ lvx vr13, r0,r22 // vr13 = vfPosition + /*S:2*/ lvx vr14, r15,r22 // vr14 = vfNormal + //nop + rldicl r17, r19, 5,43 // r17 = (updateCache & 0xFFFF) * 32 = nVertexIndex * 32 + //nop + /*S:1*/ vmulfp128 vr19, vr25, vr26 + /*S:1*/mr r22, r21 + vmaddfp vr20, vr10, vr2, vr3 // vr20 = f4sb + add r21, r17, r4 // r21 = pFlexedVertex, goes to Stage:1 + /*S:2*/ vmaddfp vr30, vr29, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + stwx r19, r14, r5 + /*S:2*/ vmaddfp vr31, vr29, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + /*S:1*/ vpermwi128 vr21, vr32, 0x1B + /*S:1*/ vpermwi128 vr22, vr33, 0x1B + vcsxwfp128 vr32, vr11, 28 + //nop + vcsxwfp128 vr33, vr12, 28 + bgt label_end_V7 + dcbt r11, r21 + bdnz label_start_V7 +label_end_V7: + + /*S:2*/ vmsum4fp128 vr29,vr19, vr1 // vr29 = scWeight + /*S:1*/ vpermwi128 vr25, vr20, 0x22 // depends on vmadd vr20 = f4sb + /*S:1*/ vpermwi128 vr26, vr20, 0xF5 + /*S:3*/ stvx vr30, r0,r23 + /*S:3*/ stvx vr31, r15,r23 + /*S:2*/mr r23,r22 + /*S:2*/ lvx vr13, r0,r22 // vr13 = vfPosition + /*S:2*/ lvx vr14, r15,r22 // vr14 = vfNormal + /*S:1*/ vmulfp128 vr19, vr25, vr26 + /*S:1*/mr r22, r21 + /*S:2*/ vmaddfp vr30, vr29, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + /*S:2*/ vmaddfp vr31, vr29, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + /*S:1*/ vpermwi128 vr21, vr32, 0x1B + /*S:1*/ vpermwi128 vr22, vr33, 0x1B + + + /*S:2*/ vmsum4fp128 vr29,vr19, vr1 // vr29 = scWeight + /*S:3*/ stvx vr30, r0,r23 + /*S:3*/ stvx vr31, r15,r23 + /*S:2*/mr r23,r22 + /*S:2*/ lvx vr13, r0,r22 // vr13 = vfPosition + /*S:2*/ lvx vr14, r15,r22 // vr14 = vfNormal + /*S:2*/ vmaddfp vr30, vr29, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + /*S:2*/ vmaddfp vr31, vr29, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + + /*S:3*/ stvx vr30, r0,r23 + /*S:3*/ stvx vr31, r15,r23 + + mftb r17 + subf r17, r25, r17 + lau r18, g_mp_morph_Vw + lal r18, r18, g_mp_morph_Vw + lwz r23, 0(r18) + add r23,r23,r17 + stw r23, 0(r18) + lwz r23, 4(r18) + add r23,r23,r8 + stw r23, 4(r18) + + ld r14, -0x08(r1) + ld r15, -0x10(r1) + ld r16, -0x18(r1) + ld r17, -0x20(r1) + ld r18, -0x28(r1) + ld r19, -0x30(r1) + ld r20, -0x38(r1) + ld r21, -0x40(r1) + ld r22, -0x48(r1) + ld r23, -0x50(r1) + ld r24, -0x58(r1) + ld r25, -0x60(r1) + + blr + } +} + + + + +// V4 rolled - latency of x3 +__declspec(naked) int ComputeFlexedVertex_StreamOffset_V6( + int nThinFlexVertexCount, //r3 + CachedPosNorm_t *pThinFlexVerts,//r4 + int32 *pFirstThinFlexIndex, //r5 + mstudiovertanim_t * pVert, //r6 + uint32 nCurrentTag, //r7 + uint32 numVertsToProcess, //r8 + fltx4 w1234 //vr1 + ) +{ + __asm + { + std r14, -0x08(r1) + std r15, -0x10(r1) + std r16, -0x18(r1) + std r17, -0x20(r1) + std r18, -0x28(r1) + std r19, -0x30(r1) + std r20, -0x38(r1) + std r21, -0x40(r1) + std r22, -0x48(r1) + std r23, -0x50(r1) + std r24, -0x58(r1) + + // let the compiler schedule the instructions, just use several registers to avoid dependencies + lau r14, g_sc256_255_special + lal r14, r14, g_sc256_255_special + lvx vr2, r0,r14 + + lau r15, g_f40011 + lal r15, r15, g_f40011 + lvx vr3, r0,r15 + + lau r16, g_perm_speed_side + lal r16, r16, g_perm_speed_side + lvx vr4, r0,r16 + + lau r17, g_perm_delta + lal r17, r17, g_perm_delta + lvx vr5, r0,r17 + + lau r18, g_perm_ndelta + lal r18, r18, g_perm_ndelta + lvx vr6, r0,r18 + + lau r20, g_dummy2 + lal r20,r20, g_dummy2 + mr r21, r20 + mr r22, r21 + + li r10, -1 + rldicl r7,r7,0,32 // currentTag &= 0xFFFFFFFF ; just to make sure we don't mess up isCacheInvalid computation + rldicl r10,r10,0,48 // r10 = 0x0000FFFF + + vxor vr8,vr8,vr8 + + li r15, 16 + + lau r14,g_nStreamOffset_prefetch + lal r14,r14,g_nStreamOffset_prefetch + lwz r11,0(r14) + + li r24, MAXSTUDIOFLEXVERTS - 2 + + mtctr r8 + mftb r23 + +label_start: + lhz r14, 0(r6) // int n = pVert->index; + dcbt r11,r6 + addi r16, r3, 2 + cmpw r3, r24 // compare nThinFlexVertexCount to MAXSTUDIOFLEXVERTS - 2 + lvlx vr9,r0,r6 + lvrx vr10,r15,r6 + rldicl r14, r14, 2, 0 // r14 = n*4 + rldicl r16, r16, 5, 0 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + add r16, r16, r4 + vor vr9,vr9,vr10 // vr9 = packedVert = LoadUnalignedSIMD(pVert) + stvx vr8, r0,r16 + lwzx r17, r14, r5 // r17 = oldCache + stvx vr8, r15,r16 + vmsum4fp128 vr19,vr19, vr1 // vr15 = scWeight + vperm vr10, vr8, vr9, vr4 + xor r18, r17, r7 // cacheVertexIndex = oldCache^nCurrentTag + vperm vr11, vr8, vr9, vr5 + subf r18,r18,r10 // (0xFFFF-cacheVertexIndex) >> 32 + vcsxwfp vr10,vr10,8 + vperm vr12, vr8, vr9, vr6 + stvx vr23, r0,r22 + sradi r18,r18,32 // r18 = isCacheInvalid : form mask + vmaddfp vr10, vr10, vr2, vr3 // vr10 = f4sb + stvx vr24, r15,r22 + or r19,r3,r7 + andc r17, r17, r18 // r17 = oldCache & ~isCacheInvalid + and r19,r19,r18 // r19 = newCache & isCacheInvalid + vpermwi128 vr15, vr10, 0x22 + or r19, r19, r17 // r19 = updateCache + vpermwi128 vr16, vr10, 0xF5 + rldicl r17, r19, 5,43 // r17 = (updateCache & 0xFFFF) * 32 = nVertexIndex * 32 + vmaddfp vr24, vr19, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + vmaddfp vr23, vr19, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + vmulfp128 vr19, vr15, vr16 + add r17, r17, r4 // r17 = pFlexedVertex + stwx r19, r14, r5 + subf r3, r18, r3// nThinFlexVertexCount = nThinFlexVertexCount + (isCacheInvalid&1); + lvx vr13, r0,r17 // vr13 = vfPosition + addi r6, r6, 0x10 // pVert++ + lvx vr14, r15,r17 // vr14 = vfNormal + vcsxwfp vr21, vr11, 28 + mr r22,r21 + vcsxwfp vr22, vr12, 28 + mr r21,r17 + bgt label_end + dcbt r11, r17 + + bdnz label_start +label_end: + + mftb r17 + subf r17, r23, r17 + lau r18, g_mp_morph_Vx + lal r18, r18, g_mp_morph_Vx + lwz r23, 0(r18) + add r23,r23,r17 + stw r23, 0(r18) + lwz r23, 4(r18) + add r23,r23,r8 + stw r23, 4(r18) + + + vmsum4fp128 vr19,vr19, vr1 // vr15 = scWeight + stvx vr23, r0,r22 + stvx vr24, r15,r22 + vmaddfp vr24, vr19, vr22, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + vmaddfp vr23, vr19, vr21, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + stvx vr23, r0,r21 + stvx vr24, r15,r21 + + ld r14, -0x08(r1) + ld r15, -0x10(r1) + ld r16, -0x18(r1) + ld r17, -0x20(r1) + ld r18, -0x28(r1) + ld r19, -0x30(r1) + ld r20, -0x38(r1) + ld r21, -0x40(r1) + ld r22, -0x48(r1) + ld r23, -0x50(r1) + ld r24, -0x58(r1) + + blr + } +} + + + +// 2-stages +__declspec(naked) int ComputeFlexedVertex_StreamOffset_V5( + int nThinFlexVertexCount, //r3 + CachedPosNorm_t *pThinFlexVerts,//r4 + int32 *pFirstThinFlexIndex, //r5 + mstudiovertanim_t * pVert, //r6 + uint32 nCurrentTag, //r7 + uint32 numVertsToProcess, //r8 + fltx4 w1234 //vr1 + ) +{ + __asm + { + std r14, -0x08(r1) + std r15, -0x10(r1) + std r16, -0x18(r1) + std r17, -0x20(r1) + std r18, -0x28(r1) + std r19, -0x30(r1) + std r20, -0x38(r1) + + // let the compiler schedule the instructions, just use several registers to avoid dependencies + lau r14, g_sc256_255_special + lal r14, r14, g_sc256_255_special + lvx vr2, r0,r14 + + lau r15, g_f40011 + lal r15, r15, g_f40011 + lvx vr3, r0,r15 + + lau r16, g_perm_speed_side + lal r16, r16, g_perm_speed_side + lvx vr4, r0,r16 + + lau r17, g_perm_delta + lal r17, r17, g_perm_delta + lvx vr5, r0,r17 + + lau r18, g_perm_ndelta + lal r18, r18, g_perm_ndelta + lvx vr6, r0,r18 + + lau r20, g_dummy2 + lal r20,r20, g_dummy2 + + vxor vr8,vr8,vr8 + li r10, -1 + rldicl r7,r7,0,32 // currentTag &= 0xFFFFFFFF ; just to make sure we don't mess up isCacheInvalid computation + rldicl r10,r10,0,48 // r10 = 0x0000FFFF + mtctr r8 + + li r15, 16 + +label_start_schlp: + lhz r14, 0(r6) // int n = pVert->index; + addi r16, r3, 2 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + lvlx vr9,r0,r6 + rldicl r14, r14, 2, 0 // r14 = n*4 + lvrx vr10,r15,r6 + rldicl r16, r16, 5, 0 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + + vor vr9,vr9,vr10 // vr9 = packedVert = LoadUnalignedSIMD(pVert) + + add r16, r16, r4 + + vperm vr10, vr8, vr9, vr4 //__vperm(f4Zero, packedVert, permuteSpeedSide) + addi r6, r6, 0x10 // pVert++ + vcsxwfp vr10,vr10,8 + + vmaddfp vr17, vr15, vr11, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) - stage 1 + vmaddfp vr18, vr15, vr12, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) - stage 1 + + vperm vr11, vr8, vr9, vr5 //f3Delta = __vperm(f4Zero, packedVert, permuteDelta) + vcsxwfp vr11, vr11, 28 + vperm vr12, vr8, vr9, vr6 //f3NDelta = __vperm(f4Zero, packedVert, permuteNDelta) + vcsxwfp vr12, vr12, 28 + + vmaddfp vr10, vr10, vr2, vr3 // vr10 = f4sb + + lwzx r17, r14, r5 // r17 = oldCache + xor r18, r17, r7 // cacheVertexIndex = oldCache^nCurrentTag + subf r18,r18,r10 // (0xFFFF-cacheVertexIndex) >> 32 + + or r19,r3,r7 // newCache = nCurrentTag | nThinFlexVertexCount + sradi r18,r18,32 // r18 = isCacheInvalid : form mask + vpermwi128 vr15, vr10, 0x22 + and r19,r19,r18 // r19 = newCache & isCacheInvalid + vpermwi128 vr16, vr10, 0xF5 + andc r17, r17, r18 // r17 = oldCache & ~isCacheInvalid + stvx vr8, r0, r16 + or r19, r19, r17 // r19 = updateCache + stvx vr8, r15, r16 + + rldicl r17, r19, 5,43 // r17 = (updateCache & 0xFFFF) * 32 = nVertexIndex * 32 + add r17, r17, r4 // r17 = pFlexedVertex + vmulfp128 vr15, vr15, vr16 + lvx vr13, r0,r17 // vr13 = vfPosition + lvx vr14, r15,r17 // vr14 = vfNormal + + vmsum4fp128 vr15,vr15, vr1 // vr15 = scWeight + + stwx r19, r14, r5 // pFirstThinFlexIndex[n] = updateCache + subf r3, r18, r3// nThinFlexVertexCount = nThinFlexVertexCount + (isCacheInvalid&1); + + stvx vr17, r0,r20 // stage 1 + stvx vr18, r15,r20 // stage 1 + + mr r20, r17 + + bdnz label_start_schlp + + vmaddfp vr17, vr15, vr11, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) - stage 1 + vmaddfp vr18, vr15, vr12, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) - stage 1 + stvx vr17, r0,r20 // stage 1; deferred storing saves 15 cycles (10%!) + stvx vr18, r15,r20 + + ld r14, -0x08(r1) + ld r15, -0x10(r1) + ld r16, -0x18(r1) + ld r17, -0x20(r1) + ld r18, -0x28(r1) + ld r19, -0x30(r1) + ld r20, -0x38(r1) + + blr + } +} + +// V3 in asm +__declspec(naked) int ComputeFlexedVertex_StreamOffset_V4( + int nThinFlexVertexCount, //r3 + CachedPosNorm_t *pThinFlexVerts,//r4 + int32 *pFirstThinFlexIndex, //r5 + mstudiovertanim_t * pVert, //r6 + uint32 nCurrentTag, //r7 + uint32 numVertsToProcess, //r8 + fltx4 w1234 //vr1 + ) +{ + __asm + { + std r14, -0x08(r1) + std r15, -0x10(r1) + std r16, -0x18(r1) + std r17, -0x20(r1) + std r18, -0x28(r1) + std r19, -0x30(r1) + + // let the compiler schedule the instructions, just use several registers to avoid dependencies + lau r14, g_sc256_255_special + lal r14, r14, g_sc256_255_special + lvx vr2, r0,r14 + + lau r15, g_f40011 + lal r15, r15, g_f40011 + lvx vr3, r0,r15 + + lau r16, g_perm_speed_side + lal r16, r16, g_perm_speed_side + lvx vr4, r0,r16 + + lau r17, g_perm_delta + lal r17, r17, g_perm_delta + lvx vr5, r0,r17 + + lau r18, g_perm_ndelta + lal r18, r18, g_perm_ndelta + lvx vr6, r0,r18 + + li r10, -1 + rldicl r7,r7,0,32 // currentTag &= 0xFFFFFFFF ; just to make sure we don't mess up isCacheInvalid computation + rldicl r10,r10,0,48 // r10 = 0x0000FFFF + + lau r14,g_nStreamOffset_prefetch + lal r14,r14,g_nStreamOffset_prefetch + lwz r11,0(r14) + + vxor vr8,vr8,vr8 + + li r15, 16 + li r24, MAXSTUDIOFLEXVERTS - 3 // critical number at which to stop processing + + mtctr r8 +label_start: + lhz r14, 0(r6) // int n = pVert->index; + dcbt r11,r16 + rldicl r14, r14, 2, 0 // r14 = n*4 + + + addi r16, r3, 2 + rldicl r16, r16, 5, 0 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + add r16, r16, r4 + stvx vr8, r0,r16 + stvx vr8, r15,r16 + + lvlx vr9,r0,r6 + lvrx vr10,r15,r6 + vor vr9,vr9,vr10 // vr9 = packedVert = LoadUnalignedSIMD(pVert) + + vperm vr10, vr8, vr9, vr4 //__vperm(f4Zero, packedVert, permuteSpeedSide) + vcsxwfp vr10,vr10,8 + vmaddfp vr10, vr10, vr2, vr3 // vr10 = f4sb + + vperm vr11, vr8, vr9, vr5 //f3Delta = __vperm(f4Zero, packedVert, permuteDelta) + vcsxwfp vr11, vr11, 28 + vperm vr12, vr8, vr9, vr6 //f3NDelta = __vperm(f4Zero, packedVert, permuteNDelta) + vcsxwfp vr12, vr12, 28 + + lwzx r17, r14, r5 // r17 = oldCache + xor r18, r17, r7 // cacheVertexIndex = oldCache^nCurrentTag + subf r18,r18,r10 // (0xFFFF-cacheVertexIndex) >> 32 + sradi r18,r18,32 // r18 = isCacheInvalid : form mask + + or r19,r3,r7 // newCache = nCurrentTag | nThinFlexVertexCount + and r19,r19,r18 // r19 = newCache & isCacheInvalid + andc r17, r17, r18 // r17 = oldCache & ~isCacheInvalid + or r19, r19, r17 // r19 = updateCache + + rldicl r17, r19, 5,43 // r17 = (updateCache & 0xFFFF) * 32 = nVertexIndex * 32 + add r17, r17, r4 // r17 = pFlexedVertex + lvx vr13, r0,r17 // vr13 = vfPosition + lvx vr14, r15,r17 // vr14 = vfNormal + dcbt r11,r17 + + vpermwi128 vr15, vr10, 0x22 + vpermwi128 vr16, vr10, 0xF5 + vmulfp128 vr15, vr15, vr16 + vmsum4fp128 vr15,vr15, vr1 // vr15 = scWeight + + stwx r19, r14, r5 // pFirstThinFlexIndex[n] = updateCache + subf r3, r18, r3 // nThinFlexVertexCount = nThinFlexVertexCount + (isCacheInvalid&1); + + vmaddfp vr14, vr15, vr12, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + vmaddfp vr13, vr15, vr11, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + + stvx vr13, r0,r17 + stvx vr14, r15,r17 + + cmpw r3, r24 + bgt label_end + + addi r6, r6, 0x10 // pVert++ + bdnz label_start +label_end: + + ld r14, -0x08(r1) + ld r15, -0x10(r1) + ld r16, -0x18(r1) + ld r17, -0x20(r1) + ld r18, -0x28(r1) + ld r19, -0x30(r1) + + blr + } +} + + + +// V3 in asm +__declspec(naked) int ComputeFlexedVertexWrinkle_StreamOffset_V4( + int nThinFlexVertexCount, //r3 + CachedPosNorm_t *pThinFlexVerts,//r4 + int32 *pFirstThinFlexIndex, //r5 + mstudiovertanim_wrinkle_t * pVert,//r6 + uint32 nCurrentTag, //r7 + uint32 numVertsToProcess, //r8 + fltx4 w1234 //vr1 + ) +{ + __asm + { + std r14, -0x08(r1) + std r15, -0x10(r1) + std r16, -0x18(r1) + std r17, -0x20(r1) + std r18, -0x28(r1) + std r19, -0x30(r1) + + // let the compiler schedule the instructions, just use several registers to avoid dependencies + lau r14, g_sc256_255_special + lal r14, r14, g_sc256_255_special + lvx vr2, r0,r14 + + lau r15, g_f40011 + lal r15, r15, g_f40011 + lvx vr3, r0,r15 + + lau r16, g_perm_speed_side + lal r16, r16, g_perm_speed_side + lvx vr4, r0,r16 + + lau r17, g_perm_delta_wrinkle + lal r17, r17, g_perm_delta_wrinkle + lvx vr5, r0,r17 + + lau r18, g_perm_ndelta + lal r18, r18, g_perm_ndelta + lvx vr6, r0,r18 + + li r10, -1 + rldicl r7,r7,0,32 // currentTag &= 0xFFFFFFFF ; just to make sure we don't mess up isCacheInvalid computation + rldicl r10,r10,0,48 // r10 = 0x0000FFFF + + lau r14,g_nStreamOffset_prefetch + lal r14,r14,g_nStreamOffset_prefetch + lwz r11,0(r14) + + vxor vr8,vr8,vr8 + + li r15, 16 + li r24, MAXSTUDIOFLEXVERTS - 3 // critical number at which to stop processing + + mtctr r8 + label_start: + lhz r14, 0(r6) // int n = pVert->index; + dcbt r11,r16 + rldicl r14, r14, 2, 0 // r14 = n*4 + + + addi r16, r3, 2 + rldicl r16, r16, 5, 0 // r16 = (nThinFlexVertexCount+2) * 32 + pThinFlexVerts + add r16, r16, r4 + stvx vr8, r0,r16 + stvx vr8, r15,r16 + + lvlx vr27,r15,r6 // f3PreDelta + lvlx vr9,r0,r6 + lvrx vr10,r15,r6 + vor vr9,vr9,vr10 // vr9 = packedVert = LoadUnalignedSIMD(pVert) + vrlimi128 vr27,vr9,7,0// f3PreDelta + + vperm vr10, vr8, vr9, vr4 //__vperm(f4Zero, packedVert, permuteSpeedSide) + vcsxwfp vr10,vr10,8 + vmaddfp vr10, vr10, vr2, vr3 // vr10 = f4sb + + vperm vr11, vr8, vr27, vr5 //f3Delta = __vperm(f4Zero, f3PreDelta, permuteDelta) + vcsxwfp vr11, vr11, 28 + vperm vr12, vr8, vr9, vr6 //f3NDelta = __vperm(f4Zero, packedVert, permuteNDelta) + vcsxwfp vr12, vr12, 28 + + lwzx r17, r14, r5 // r17 = oldCache + xor r18, r17, r7 // cacheVertexIndex = oldCache^nCurrentTag + subf r18,r18,r10 // (0xFFFF-cacheVertexIndex) >> 32 + sradi r18,r18,32 // r18 = isCacheInvalid : form mask + + or r19,r3,r7 // newCache = nCurrentTag | nThinFlexVertexCount + and r19,r19,r18 // r19 = newCache & isCacheInvalid + andc r17, r17, r18 // r17 = oldCache & ~isCacheInvalid + or r19, r19, r17 // r19 = updateCache + + rldicl r17, r19, 5,43 // r17 = (updateCache & 0xFFFF) * 32 = nVertexIndex * 32 + add r17, r17, r4 // r17 = pFlexedVertex + lvx vr13, r0,r17 // vr13 = vfPosition + lvx vr14, r15,r17 // vr14 = vfNormal + dcbt r11,r17 + + vpermwi128 vr15, vr10, 0x22 + vpermwi128 vr16, vr10, 0xF5 + vmulfp128 vr15, vr15, vr16 + vmsum4fp128 vr15,vr15, vr1 // vr15 = scWeight + + stwx r19, r14, r5 // pFirstThinFlexIndex[n] = updateCache + subf r3, r18, r3 // nThinFlexVertexCount = nThinFlexVertexCount + (isCacheInvalid&1); + + vmaddfp vr14, vr15, vr12, vr14 // MaddSIMD(scWeight,f3NDelta, vfNormal) + vmaddfp vr13, vr15, vr11, vr13 // MaddSIMD(scWeight,f3Delta, vfPosition) + + stvx vr13, r0,r17 + stvx vr14, r15,r17 + + cmpw r3, r24 + bgt label_end + + addi r6, r6, 0x12 // pVert++ + bdnz label_start + label_end: + + ld r14, -0x08(r1) + ld r15, -0x10(r1) + ld r16, -0x18(r1) + ld r17, -0x20(r1) + ld r18, -0x28(r1) + ld r19, -0x30(r1) + + blr + } +} + + + +// base for asm +int ComputeFlexedVertex_StreamOffset_V3(int nThinFlexVertexCount, CachedPosNorm_t *pThinFlexVerts, int32 *pFirstThinFlexIndex, mstudiovertanim_t * pVert, uint32 nCurrentTag, uint32 numVertsToProcess, fltx4 w1234) +{ + fltx4 sc256_255_special = g_sc256_255_special; + fltx4 f40011 = g_f40011; + fltx4 permuteSpeedSide = LoadAlignedSIMD((const float*)g_perm_speed_side); + fltx4 permuteDelta = LoadAlignedSIMD((const float*)g_perm_delta); + fltx4 permuteNDelta = LoadAlignedSIMD((const float*)g_perm_ndelta); + //fltx4 permuteW0 = LoadAlignedSIMD((const float*)g_perm_w0); + //fltx4 permuteW1 = LoadAlignedSIMD((const float*)g_perm_w1); + fltx4 f4Zero = Four_Zeros; + + do + { + int n = pVert->index; + pThinFlexVerts[nThinFlexVertexCount+2].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+2].m_Normal.InitZero(); + fltx4 packedVert = LoadUnalignedSIMD((const float*)pVert); + fltx4 f4sb = MaddSIMD(__vcfsx(__vperm(f4Zero, packedVert, permuteSpeedSide), 8), sc256_255_special, f40011); + // f4sb = {s,b,1-s,1-b} + + fltx4 f3Delta = __vcfsx(__vperm(f4Zero, packedVert, permuteDelta), 12+16); + fltx4 f3NDelta = __vcfsx(__vperm(f4Zero, packedVert, permuteNDelta), 12+16); + uint64 oldCache = uint32(pFirstThinFlexIndex[n]); + uint64 cacheVertexIndex = oldCache^nCurrentTag; // if there is trash in high (2^16) bits, we need to update the cache + int64 isCacheInvalid = int64(0xFFFF-cacheVertexIndex)>>32; // the second shift must be arithmetic to form a valid mask + int64 isCacheValid = ~isCacheInvalid; + + int64 newCache = nCurrentTag | nThinFlexVertexCount; + int64 updateCache = (newCache & isCacheInvalid) | (oldCache & isCacheValid); + nThinFlexVertexCount = nThinFlexVertexCount - isCacheInvalid; + + int nVertexIndex = updateCache & 0xFFFF; + + CachedPosNorm_t *pFlexedVertex = pThinFlexVerts + nVertexIndex; // will be overridden + fltx4 vfNormal = LoadAlignedSIMD((float*)&pFlexedVertex->m_Normal); + fltx4 vfPosition = LoadAlignedSIMD((float*)&pFlexedVertex->m_Position); + + // here we need to form the following vector to compute final w: + // {s(1-b), (1-s)(1-b), sb, (1-s)b} + //fltx4 f4sbProd = MulSIMD(__vperm(f4sb,f4sb,permuteW0), __vperm(f4sb,f4sb,permuteW1)); + fltx4 f4sbProd = MulSIMD(__vpermwi(f4sb,0x22), __vpermwi(f4sb,0xF5)); + fltx4 scWeight = __vmsum4fp(f4sbProd,w1234); + + pFirstThinFlexIndex[n] = updateCache; + StoreAlignedSIMD((float*)&pFlexedVertex->m_Normal, MaddSIMD(scWeight,f3NDelta, vfNormal)); + StoreAlignedSIMD((float*)&pFlexedVertex->m_Position, MaddSIMD(scWeight,f3Delta, vfPosition)); + + pVert ++; + } + while(--numVertsToProcess); // why doesn't this use bdnz?? + + return nThinFlexVertexCount; +} + + +// base for asm +int ComputeFlexedVertexWrinkle_StreamOffset_V3(int nThinFlexVertexCount, CachedPosNorm_t *pThinFlexVerts, int32 *pFirstThinFlexIndex, mstudiovertanim_wrinkle_t * pVert, uint32 nCurrentTag, uint32 numVertsToProcess, fltx4 w1234) +{ + fltx4 sc256_255_special = g_sc256_255_special; + fltx4 f40011 = g_f40011; + fltx4 permuteSpeedSide = LoadAlignedSIMD((const float*)g_perm_speed_side); + fltx4 permuteDelta = LoadAlignedSIMD((const float*)g_perm_delta_wrinkle); + fltx4 permuteNDelta = LoadAlignedSIMD((const float*)g_perm_ndelta); + //fltx4 permuteW0 = LoadAlignedSIMD((const float*)g_perm_w0); + //fltx4 permuteW1 = LoadAlignedSIMD((const float*)g_perm_w1); + fltx4 f4Zero = Four_Zeros; + + do + { + int n = pVert->index; + pThinFlexVerts[nThinFlexVertexCount+2].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+2].m_Normal.InitZero(); + fltx4 packedVert = LoadUnalignedSIMD((const float*)pVert); + fltx4 f3PreDelta = __lvlx(pVert, 16); // f3Delta now contains only packed W component in high X halfword... + fltx4 f4sb = MaddSIMD(__vcfsx(__vperm(f4Zero, packedVert, permuteSpeedSide), 8), sc256_255_special, f40011); + // f4sb = {s,b,1-s,1-b} + + + f3PreDelta = __vrlimi(f3PreDelta, packedVert, 7, 0); // don't rotate and move bytes 4..15 from packed vert to f3PreDelta + fltx4 f3NDelta = __vcfsx(__vperm(f4Zero, packedVert, permuteNDelta), 12+16); + fltx4 f3Delta = __vcfsx(__vperm(f4Zero, f3PreDelta, permuteDelta), 12+16); + uint64 oldCache = uint32(pFirstThinFlexIndex[n]); + uint64 cacheVertexIndex = oldCache^nCurrentTag; // if there is trash in high (2^16) bits, we need to update the cache + int64 isCacheInvalid = int64(0xFFFF-cacheVertexIndex)>>32; // the second shift must be arithmetic to form a valid mask + int64 isCacheValid = ~isCacheInvalid; + + int64 newCache = nCurrentTag | nThinFlexVertexCount; + int64 updateCache = (newCache & isCacheInvalid) | (oldCache & isCacheValid); + nThinFlexVertexCount = nThinFlexVertexCount - isCacheInvalid; + + int nVertexIndex = updateCache & 0xFFFF; + + CachedPosNorm_t *pFlexedVertex = pThinFlexVerts + nVertexIndex; // will be overridden + fltx4 vfNormal = LoadAlignedSIMD((float*)&pFlexedVertex->m_Normal); + fltx4 vfPosition = LoadAlignedSIMD((float*)&pFlexedVertex->m_Position); + + // here we need to form the following vector to compute final w: + // {s(1-b), (1-s)(1-b), sb, (1-s)b} + //fltx4 f4sbProd = MulSIMD(__vperm(f4sb,f4sb,permuteW0), __vperm(f4sb,f4sb,permuteW1)); + fltx4 f4sbProd = MulSIMD(__vpermwi(f4sb,0x22), __vpermwi(f4sb,0xF5)); + fltx4 scWeight = __vmsum4fp(f4sbProd,w1234); + + pFirstThinFlexIndex[n] = updateCache; + StoreAlignedSIMD((float*)&pFlexedVertex->m_Normal, MaddSIMD(scWeight,f3NDelta, vfNormal)); + StoreAlignedSIMD((float*)&pFlexedVertex->m_Position, MaddSIMD(scWeight,f3Delta, vfPosition)); + + pVert ++; + } + while(--numVertsToProcess); // why doesn't this use bdnz?? + + return nThinFlexVertexCount; +} + +// tried to pipeline in C++ +int ComputeFlexedVertex_StreamOffset_V2(int nThinFlexVertexCount, CachedPosNorm_t *pThinFlexVerts, int32 *pFirstThinFlexIndex, mstudiovertanim_t * pVert, uint32 nCurrentTag, uint32 numVertsToProcess, fltx4 w1234) +{ + Assert(0 == (uint32(pVert) & 0xF)); + fltx4 sc256_255_special = g_sc256_255_special; + fltx4 f40011 = g_f40011; + fltx4 permuteSpeedSide = LoadAlignedSIMD((const float*)g_perm_speed_side); + fltx4 permuteDelta = LoadAlignedSIMD((const float*)g_perm_delta); + fltx4 permuteNDelta = LoadAlignedSIMD((const float*)g_perm_ndelta); + //fltx4 permuteW0 = LoadAlignedSIMD((const float*)g_perm_w0); + //fltx4 permuteW1 = LoadAlignedSIMD((const float*)g_perm_w1); + fltx4 f4Zero = Four_Zeros; + + fltx4 f4sb_st1, f3Delta_st1, f3NDelta_st1; + int32 updateCache_st1; + mstudiovertanim_t *pVertEnd = pVert + numVertsToProcess; + { + // stage 0 + int n = pVert->index; + pThinFlexVerts[nThinFlexVertexCount+2].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+2].m_Normal.InitZero(); + fltx4 packedVert = LoadUnalignedSIMD((const float*)pVert); + fltx4 f4sb = MaddSIMD(__vcfsx(__vperm(f4Zero, packedVert, permuteSpeedSide), 8), sc256_255_special, f40011); // to be completely correct, we'll ned to multiply this with 256/255 + // f4sb = {s,b,1-s,1-b} + + fltx4 f3Delta = __vcfsx(__vperm(f4Zero, packedVert, permuteDelta), 12+16); + fltx4 f3NDelta = __vcfsx(__vperm(f4Zero, packedVert, permuteNDelta), 12+16); + uint64 oldCache = uint32(pFirstThinFlexIndex[n]); + uint64 cacheVertexIndex = oldCache^nCurrentTag; // if there is trash in high (2^16) bits, we need to update the cache + int64 isCacheInvalid = int64(0xFFFF-cacheVertexIndex)>>32; // the second shift must be arithmetic to form a valid mask + int64 isCacheValid = ~isCacheInvalid; + + int64 newCache = nCurrentTag | nThinFlexVertexCount; + int64 updateCache = (newCache & isCacheInvalid) | (oldCache & isCacheValid); + nThinFlexVertexCount = nThinFlexVertexCount - isCacheInvalid; + + pFirstThinFlexIndex[n] = updateCache; + + // prime next stage 1 + f4sb_st1 = f4sb; + f3Delta_st1 = f3Delta; + f3NDelta_st1 = f3NDelta; + updateCache_st1 = updateCache; + + pVert ++; + } + + while(pVert < pVertEnd) + { + // stage 1 + { + int nVertexIndex = updateCache_st1 & 0xFFFF; + + CachedPosNorm_t *pFlexedVertex = pThinFlexVerts + nVertexIndex; // will be overridden + + fltx4 vfNormal = LoadAlignedSIMD((float*)&pFlexedVertex->m_Normal); + fltx4 vfPosition = LoadAlignedSIMD((float*)&pFlexedVertex->m_Position); + + // here we need to form the following vector to compute final w: + // {s(1-b), (1-s)(1-b), sb, (1-s)b} + //fltx4 f4sbProd = MulSIMD(__vperm(f4sb_st1,f4sb_st1,permuteW0), __vperm(f4sb_st1,f4sb_st1,permuteW1)); + fltx4 f4sbProd = MulSIMD(__vpermwi(f4sb_st1,0x22), __vpermwi(f4sb_st1,0xF5)); + fltx4 scWeight = __vmsum4fp(f4sbProd,w1234); + + StoreAlignedSIMD((float*)&pFlexedVertex->m_Normal, MaddSIMD(scWeight,f3NDelta_st1, vfNormal)); + StoreAlignedSIMD((float*)&pFlexedVertex->m_Position, MaddSIMD(scWeight,f3Delta_st1, vfPosition)); + } + + // stage 0 + { + int n = pVert->index; + pThinFlexVerts[nThinFlexVertexCount+2].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+2].m_Normal.InitZero(); + fltx4 packedVert = LoadUnalignedSIMD((const float*)pVert); + fltx4 f4sb = MaddSIMD(__vcfsx(__vperm(f4Zero, packedVert, permuteSpeedSide), 8), sc256_255_special, f40011); // to be completely correct, we'll ned to multiply this with 256/255 + // f4sb = {s,b,1-s,1-b} + + fltx4 f3Delta = __vcfsx(__vperm(f4Zero, packedVert, permuteDelta), 12+16); + fltx4 f3NDelta = __vcfsx(__vperm(f4Zero, packedVert, permuteNDelta), 12+16); + uint64 oldCache = uint32(pFirstThinFlexIndex[n]); + uint64 cacheVertexIndex = oldCache^nCurrentTag; // if there is trash in high (2^16) bits, we need to update the cache + int64 isCacheInvalid = int64(0xFFFF-cacheVertexIndex)>>32; // the second shift must be arithmetic to form a valid mask + int64 isCacheValid = ~isCacheInvalid; + + int64 newCache = nCurrentTag | nThinFlexVertexCount; + int64 updateCache = (newCache & isCacheInvalid) | (oldCache & isCacheValid); + nThinFlexVertexCount = nThinFlexVertexCount - isCacheInvalid; + + pFirstThinFlexIndex[n] = updateCache; // this may be put wherever it doesn't mess up the other stores + + // prime next stage 1 + f4sb_st1 = f4sb; + updateCache_st1 = updateCache; + f3Delta_st1 = f3Delta; + f3NDelta_st1 = f3NDelta; + } + + pVert ++; + } + + // stage 1 + { + int nVertexIndex = updateCache_st1 & 0xFFFF; + + CachedPosNorm_t *pFlexedVertex = pThinFlexVerts + nVertexIndex; // will be overridden + + fltx4 vfNormal = LoadAlignedSIMD((float*)&pFlexedVertex->m_Normal); + fltx4 vfPosition = LoadAlignedSIMD((float*)&pFlexedVertex->m_Position); + + // here we need to form the following vector to compute final w: + // {s(1-b), (1-s)(1-b), sb, (1-s)b} + //fltx4 f4sbProd = MulSIMD(__vperm(f4sb_st1,f4sb_st1,permuteW0), __vperm(f4sb_st1,f4sb_st1,permuteW1)); + fltx4 f4sbProd = MulSIMD(__vpermwi(f4sb_st1,0x22), __vpermwi(f4sb_st1,0xF5)); + fltx4 scWeight = __vmsum4fp(f4sbProd,w1234); + + StoreAlignedSIMD((float*)&pFlexedVertex->m_Normal, MaddSIMD(scWeight,f3NDelta_st1, vfNormal)); + StoreAlignedSIMD((float*)&pFlexedVertex->m_Position, MaddSIMD(scWeight,f3Delta_st1, vfPosition)); + } + return nThinFlexVertexCount; +} + +// branchless +int ComputeFlexedVertex_StreamOffset_V1(int nThinFlexVertexCount, CachedPosNorm_t *pThinFlexVerts, int32 *pFirstThinFlexIndex, mstudiovertanim_t * pVert, uint32 nCurrentTag, uint32 numVertsToProcess, fltx4 w1234) +{ + Assert(0 == (uint32(pVert) & 0xF)); + fltx4 sc256_255_special = g_sc256_255_special; + fltx4 f40011 = g_f40011; + fltx4 permuteSpeedSide = LoadAlignedSIMD((const float*)g_perm_speed_side); + fltx4 permuteDelta = LoadAlignedSIMD((const float*)g_perm_delta); + fltx4 permuteNDelta = LoadAlignedSIMD((const float*)g_perm_ndelta); + //fltx4 permuteW0 = LoadAlignedSIMD((const float*)g_perm_w0); + //fltx4 permuteW1 = LoadAlignedSIMD((const float*)g_perm_w1); + fltx4 f4Zero = Four_Zeros; + mstudiovertanim_t *pVertEnd = pVert + numVertsToProcess; + do + { + int n = pVert->index; + pThinFlexVerts[nThinFlexVertexCount].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount].m_Normal.InitZero(); + fltx4 packedVert = LoadUnalignedSIMD((const float*)pVert); + fltx4 f4sb = MaddSIMD(__vcfsx(__vperm(f4Zero, packedVert, permuteSpeedSide), 8), sc256_255_special, f40011); + // f4sb = {s,b,1-s,1-b} + + fltx4 f3Delta = __vcfsx(__vperm(f4Zero, packedVert, permuteDelta), 12+16); + fltx4 f3NDelta = __vcfsx(__vperm(f4Zero, packedVert, permuteNDelta), 12+16); + uint64 oldCache = uint32(pFirstThinFlexIndex[n]); + uint64 cacheVertexIndex = oldCache^nCurrentTag; // if there is trash in high (2^16) bits, we need to update the cache + int64 isCacheInvalid = int64(0xFFFF-cacheVertexIndex)>>32; // the second shift must be arithmetic to form a valid mask + int32 isCacheValid = ~isCacheInvalid; + + int32 newCache = nCurrentTag | nThinFlexVertexCount; + int32 updateCache = (newCache & isCacheInvalid) | (oldCache & isCacheValid); + nThinFlexVertexCount = nThinFlexVertexCount - isCacheInvalid; + + int nVertexIndex = updateCache & 0xFFFF; + + CachedPosNorm_t *pFlexedVertex = pThinFlexVerts + nVertexIndex; // will be overridden + fltx4 vfNormal = LoadAlignedSIMD((float*)&pFlexedVertex->m_Normal); + fltx4 vfPosition = LoadAlignedSIMD((float*)&pFlexedVertex->m_Position); + + // here we need to form the following vector to compute final w: + // {s(1-b), (1-s)(1-b), sb, (1-s)b} + //fltx4 f4sbProd = MulSIMD(__vperm(f4sb,f4sb,permuteW0), __vperm(f4sb,f4sb,permuteW1)); + fltx4 f4sbProd = MulSIMD(__vpermwi(f4sb,0x22), __vpermwi(f4sb,0xF5)); + fltx4 scWeight = __vmsum4fp(f4sbProd,w1234); + + pFirstThinFlexIndex[n] = updateCache; + StoreAlignedSIMD((float*)&pFlexedVertex->m_Normal, MaddSIMD(scWeight,f3NDelta, vfNormal)); + StoreAlignedSIMD((float*)&pFlexedVertex->m_Position, MaddSIMD(scWeight,f3Delta, vfPosition)); + + pVert ++; + } + while(pVert < pVertEnd); // why doesn't this use CTR?? + + return nThinFlexVertexCount; +} + + +typedef int (*Fn_ComputeFlexedVertex_StreamOffset)(int nThinFlexVertexCount, CachedPosNorm_t *pThinFlexVerts, int32 *pFirstThinFlexIndex, mstudiovertanim_t * pVert, uint32 nCurrentTag, uint32 numVertsToProcess, fltx4 w1234); +Fn_ComputeFlexedVertex_StreamOffset g_fn_ComputeFlexedVertex_StreamOffset[8] = +{ + NULL, + ComputeFlexedVertex_StreamOffset_V1, + ComputeFlexedVertex_StreamOffset_V2, + ComputeFlexedVertex_StreamOffset_V3, + ComputeFlexedVertex_StreamOffset_V4, + ComputeFlexedVertex_StreamOffset_V5, + ComputeFlexedVertex_StreamOffset_V6, + ComputeFlexedVertex_StreamOffset_V7 +}; + +typedef int (*Fn_ComputeFlexedVertexWrinkle_StreamOffset)(int nThinFlexVertexCount, CachedPosNorm_t *pThinFlexVerts, int32 *pFirstThinFlexIndex, mstudiovertanim_wrinkle_t * pVert, uint32 nCurrentTag, uint32 numVertsToProcess, fltx4 w1234); +Fn_ComputeFlexedVertexWrinkle_StreamOffset g_fn_ComputeFlexedVertexWrinkle_StreamOffset[8] = +{ + NULL, + ComputeFlexedVertexWrinkle_StreamOffset_V3, + ComputeFlexedVertexWrinkle_StreamOffset_V3, + ComputeFlexedVertexWrinkle_StreamOffset_V3, + ComputeFlexedVertexWrinkle_StreamOffset_V4, + ComputeFlexedVertexWrinkle_StreamOffset_V4, + ComputeFlexedVertexWrinkle_StreamOffset_V4, + ComputeFlexedVertexWrinkle_StreamOffset_V7 +}; + + +inline float Diff(const CachedPosNorm_t&a, const CachedPosNorm_t&b) +{ + return a.m_Position.DistTo(b.m_Position) + a.m_Normal.DistTo(b.m_Normal); +} + +bool g_bBreakOnAssert = true; +void AlwaysAssert(bool mustBeTrue) +{ + if(!mustBeTrue) + { + Plat_DebugString("AlwaysAssert\n"); + if(g_bBreakOnAssert) + DebugBreak(); + } +} + +#endif + +template +void CCachedRenderData::ComputeFlexedVertex_StreamOffset<mstudiovertanim_t>( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, + mstudiovertanim_t *pvanim, int vertCount, float w1, float w2, float w3, float w4 ); +template +void CCachedRenderData::ComputeFlexedVertex_StreamOffset<mstudiovertanim_wrinkle_t>( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, + mstudiovertanim_wrinkle_t *pvanim, int vertCount, float w1, float w2, float w3, float w4 ); + +// vectorized +void CCachedRenderData::ComputeFlexedVertex_StreamOffset_Optimized( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, mstudiovertanim_t *pvanim, int vertCount, float w1, float w2, float w3, float w4 ) +{ +#if PROFILE_THIS_FILE + CMiniProfilerGuard mpguard(&g_mp_morph); +#endif +#ifdef _X360 + int nMorphPath = g_cv_morph_path.GetInt(); + if(nMorphPath) + { + mstudiovertanim_t vertCountStruct; + vertCountStruct.index = vertCount; + /*for(uint32 i = 1; i< pflex->numverts; ++i) + if(pvanim[i-1].index > pvanim[i].index) + DebugBreak();*/ + + mstudiovertanim_t * pVertEnd; + { +#if PROFILE_THIS_FILE + CMiniProfilerGuard mpguard_lower_bound(&g_mp_morph_lower_bound); +#endif + pVertEnd = std::lower_bound(pvanim, pvanim + pflex->numverts, vertCountStruct, mstudiovertanim_t::CSortByIndex()); + } + + if(pvanim < pVertEnd) + { + union + { + fltx4 f4; + float f1[4]; + } weights; + weights.f1[0] = w1; + weights.f1[1] = w2; + weights.f1[2] = w3; + weights.f1[3] = w4; + uint32 nCurrentTag = uint32(m_CurrentTag)<<16; + int nThinFlexVertexCount = m_ThinFlexVertexCount; + int32 *pFirstThinFlexIndex = (int32*)m_pFirstThinFlexIndex; + CachedPosNorm_t *pThinFlexVerts = m_pThinFlexVerts; + uint64 numVertsToProcess = pVertEnd - pvanim; + nMorphPath = MIN(7,nMorphPath); + + /*static int maxVertsSaved = 0; + if(numVertsToProcess > maxVertsSaved) + { + maxVertsSaved = numVertsToProcess; + + FileHandle_t fh = g_pFullFileSystem->Open( "vertices.bin", "wb" ); + if(fh != FILESYSTEM_INVALID_HANDLE) + { + g_pFullFileSystem->Write(pvanim, sizeof(*pvanim) * numVertsToProcess, fh); + g_pFullFileSystem->Close(fh); + } + }*/ + + +#ifdef _DEBUG + if(0 == g_cv_morph_debug.GetInt()) +#endif + { + for(uint32 i = 0; i < 2; ++i) // reset the first 2 positions here as it's required by the algorithm.. + { + pThinFlexVerts[nThinFlexVertexCount+i].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+i].m_Normal.InitZero(); + } + nThinFlexVertexCount = g_fn_ComputeFlexedVertex_StreamOffset[nMorphPath](nThinFlexVertexCount,pThinFlexVerts,pFirstThinFlexIndex,pvanim,nCurrentTag, numVertsToProcess, weights.f4); + } +#ifdef _DEBUG + else // Validation path inactive in release, since these static arrays consume 1MB + { + bool repeat = false; + static CachedPosNorm_t backupThinFlexVerts[MAXSTUDIOFLEXVERTS+1], checkThinFlexVerts[MAXSTUDIOFLEXVERTS+1]; + static CacheIndex_t backupFirstThinFlexIndex[MAXSTUDIOVERTS+1],checkFirstThinFlexIndex[MAXSTUDIOVERTS+1]; + int newThinFlexVertexCount ; + static int numRuns = 0; + ++numRuns; + memcpy(backupThinFlexVerts, m_pThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(backupFirstThinFlexIndex, m_pThinFlexIndex, sizeof(m_pThinFlexIndex)); + do + { + for(uint32 i = 0; i < 2; ++i) // reset the first 2 positions here as it's required by the algorithm.. + { + pThinFlexVerts[nThinFlexVertexCount+i].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+i].m_Normal.InitZero(); + } + + newThinFlexVertexCount = g_fn_ComputeFlexedVertex_StreamOffset[nMorphPath](nThinFlexVertexCount,pThinFlexVerts,pFirstThinFlexIndex,pvanim,nCurrentTag, numVertsToProcess, weights.f4); + memcpy(checkThinFlexVerts, m_pThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(checkFirstThinFlexIndex, m_pThinFlexIndex, sizeof(m_pThinFlexIndex)); + memcpy(m_pThinFlexVerts, backupThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(m_pThinFlexIndex, backupFirstThinFlexIndex, sizeof(m_pThinFlexIndex)); + + ComputeFlexedVertex_StreamOffset( pStudioHdr, pflex, pvanim, vertCount, w1, w2, w3, w4); + AlwaysAssert(m_ThinFlexVertexCount == newThinFlexVertexCount); + for(int i = 0; i < newThinFlexVertexCount; ++i) + AlwaysAssert(Diff(checkThinFlexVerts[i], m_pThinFlexVerts[i]) < 1e-5f); + int indexOffset = m_pFirstThinFlexIndex - m_pThinFlexIndex; + for(int i = 0; i < numVertsToProcess; ++i) + AlwaysAssert(*(int*)&checkFirstThinFlexIndex[indexOffset + pvanim[i].index] == *(int*)&m_pThinFlexIndex[indexOffset + pvanim[i].index]); + + if(repeat) + { + m_ThinFlexVertexCount = nThinFlexVertexCount; + memcpy(m_pThinFlexVerts, backupThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(m_pThinFlexIndex, backupFirstThinFlexIndex, sizeof(m_pThinFlexIndex)); + } + } + while(repeat); + nThinFlexVertexCount = newThinFlexVertexCount; + } +#endif + m_ThinFlexVertexCount = nThinFlexVertexCount; + } + } + else +#endif + { + ComputeFlexedVertex_StreamOffset( pStudioHdr, pflex, pvanim, vertCount, w1, w2, w3, w4); + } +} + + +void CCachedRenderData::ComputeFlexedVertexWrinkle_StreamOffset_Optimized( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, mstudiovertanim_wrinkle_t *pvanim, int vertCount, float w1, float w2, float w3, float w4) +{ +#if PROFILE_THIS_FILE + CMiniProfilerGuard mpguard(&g_mp_morph); +#endif + +#ifdef _X360 + int nMorphPath = g_cv_morph_path.GetInt(); + if(nMorphPath) + { + mstudiovertanim_wrinkle_t vertCountStruct; + vertCountStruct.index = vertCount; + + mstudiovertanim_wrinkle_t * pVertEnd; + { +#if PROFILE_THIS_FILE + CMiniProfilerGuard mpguard_lower_bound(&g_mp_morph_lower_bound); +#endif + pVertEnd = std::lower_bound(pvanim, pvanim + pflex->numverts, vertCountStruct, mstudiovertanim_wrinkle_t::CSortByIndex()); + } + + if(pvanim < pVertEnd) + { + union + { + fltx4 f4; + float f1[4]; + } weights; + weights.f1[0] = w1; + weights.f1[1] = w2; + weights.f1[2] = w3; + weights.f1[3] = w4; + uint32 nCurrentTag = uint32(m_CurrentTag)<<16; + int nThinFlexVertexCount = m_ThinFlexVertexCount; + int32 *pFirstThinFlexIndex = (int32*)m_pFirstThinFlexIndex; + CachedPosNorm_t *pThinFlexVerts = m_pThinFlexVerts; + uint64 numVertsToProcess = pVertEnd - pvanim; + nMorphPath = MIN(7,nMorphPath); + +#ifdef _DEBUG + if(0 == g_cv_morph_debug.GetInt()) +#endif + { + for(uint32 i = 0; i < 2; ++i) // reset the first 2 positions here as it's required by the algorithm.. + { + pThinFlexVerts[nThinFlexVertexCount+i].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+i].m_Normal.InitZero(); + } + nThinFlexVertexCount = g_fn_ComputeFlexedVertexWrinkle_StreamOffset[nMorphPath](nThinFlexVertexCount,pThinFlexVerts,pFirstThinFlexIndex,pvanim,nCurrentTag, numVertsToProcess, weights.f4); + } +#ifdef _DEBUG + else // Validation path inactive in release, since these static arrays consume 1MB + { + bool repeat = false; + static CachedPosNorm_t backupThinFlexVerts[MAXSTUDIOFLEXVERTS+1], checkThinFlexVerts[MAXSTUDIOFLEXVERTS+1]; + static CacheIndex_t backupFirstThinFlexIndex[MAXSTUDIOVERTS+1],checkFirstThinFlexIndex[MAXSTUDIOVERTS+1]; + int newThinFlexVertexCount ; + static int numRuns = 0; + ++numRuns; + memcpy(backupThinFlexVerts, m_pThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(backupFirstThinFlexIndex, m_pThinFlexIndex, sizeof(m_pThinFlexIndex)); + do + { + for(uint32 i = 0; i < 2; ++i) // reset the first 2 positions here as it's required by the algorithm.. + { + pThinFlexVerts[nThinFlexVertexCount+i].m_Position.InitZero(); + pThinFlexVerts[nThinFlexVertexCount+i].m_Normal.InitZero(); + } + + newThinFlexVertexCount = g_fn_ComputeFlexedVertexWrinkle_StreamOffset[nMorphPath](nThinFlexVertexCount,pThinFlexVerts,pFirstThinFlexIndex,pvanim,nCurrentTag, numVertsToProcess, weights.f4); + memcpy(checkThinFlexVerts, m_pThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(checkFirstThinFlexIndex, m_pThinFlexIndex, sizeof(m_pThinFlexIndex)); + memcpy(m_pThinFlexVerts, backupThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(m_pThinFlexIndex, backupFirstThinFlexIndex, sizeof(m_pThinFlexIndex)); + + ComputeFlexedVertex_StreamOffset( pStudioHdr, pflex, pvanim, vertCount, w1, w2, w3, w4); + AlwaysAssert(m_ThinFlexVertexCount == newThinFlexVertexCount); + for(int i = 0; i < newThinFlexVertexCount; ++i) + AlwaysAssert(Diff(checkThinFlexVerts[i], m_pThinFlexVerts[i]) < 1e-5f); + int indexOffset = m_pFirstThinFlexIndex - m_pThinFlexIndex; + for(int i = 0; i < numVertsToProcess; ++i) + AlwaysAssert(*(int*)&checkFirstThinFlexIndex[indexOffset + pvanim[i].index] == *(int*)&m_pThinFlexIndex[indexOffset + pvanim[i].index]); + + if(repeat) + { + m_ThinFlexVertexCount = nThinFlexVertexCount; + memcpy(m_pThinFlexVerts, backupThinFlexVerts, sizeof(m_pThinFlexVerts)); + memcpy(m_pThinFlexIndex, backupFirstThinFlexIndex, sizeof(m_pThinFlexIndex)); + } + } + while(repeat); + nThinFlexVertexCount = newThinFlexVertexCount; + } +#endif + m_ThinFlexVertexCount = nThinFlexVertexCount; + } + } + else +#endif + { + ComputeFlexedVertex_StreamOffset( pStudioHdr, pflex, pvanim, vertCount, w1, w2, w3, w4); + } +} + +#endif // PLATFORM_WINDOWS
\ No newline at end of file diff --git a/studiorender/r_studioflex.cpp b/studiorender/r_studioflex.cpp new file mode 100644 index 0000000..0aea6b1 --- /dev/null +++ b/studiorender/r_studioflex.cpp @@ -0,0 +1,928 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//===========================================================================// + +#include "studio.h" +#include "studiorendercontext.h" +#include "bitmap/imageformat.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/imaterial.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/itexture.h" +#include "materialsystem/imesh.h" +#include "mathlib/mathlib.h" +#include "studiorender.h" +#include "pixelwriter.h" +#include "vtf/vtf.h" +#include "tier1/convar.h" +#include "tier1/KeyValues.h" +#include "tier0/vprof.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +#define sign( a ) (((a) < 0) ? -1 : (((a) > 0) ? 1 : 0 )) + +void CStudioRender::R_StudioEyeballPosition( const mstudioeyeball_t *peyeball, eyeballstate_t *pstate ) +{ + // Vector forward; + // Vector org, right, up; + + pstate->peyeball = peyeball; + + Vector tmp; + // move eyeball into worldspace + { + // ConDMsg("%.2f %.2f %.2f\n", peyeball->org[0], peyeball->org[1], peyeball->org[2] ); + + VectorCopy( peyeball->org, tmp ); + + tmp[0] += m_pRC->m_Config.fEyeShiftX * sign( tmp[0] ); + tmp[1] += m_pRC->m_Config.fEyeShiftY * sign( tmp[1] ); + tmp[2] += m_pRC->m_Config.fEyeShiftZ * sign( tmp[2] ); + } + VectorTransform( tmp, m_pBoneToWorld[peyeball->bone], pstate->org ); + VectorRotate( peyeball->up, m_pBoneToWorld[peyeball->bone], pstate->up ); + + // look directly at target + VectorSubtract( m_pRC->m_ViewTarget, pstate->org, pstate->forward ); + VectorNormalize( pstate->forward ); + + if ( !m_pRC->m_Config.bEyeMove ) + { + VectorRotate( peyeball->forward, m_pBoneToWorld[peyeball->bone], pstate->forward ); + VectorScale( pstate->forward, -1 ,pstate->forward ); // ??? + } + + CrossProduct( pstate->forward, pstate->up, pstate->right ); + VectorNormalize( pstate->right ); + + // shift N degrees off of the target + float dz; + dz = peyeball->zoffset; + + VectorMA( pstate->forward, peyeball->zoffset + dz, pstate->right, pstate->forward ); + +#if 0 + // add random jitter + VectorMA( forward, RandomFloat( -0.02, 0.02 ), right, forward ); + VectorMA( forward, RandomFloat( -0.02, 0.02 ), up, forward ); +#endif + + VectorNormalize( pstate->forward ); + // re-aim eyes + CrossProduct( pstate->forward, pstate->up, pstate->right ); + VectorNormalize( pstate->right ); + + CrossProduct( pstate->right, pstate->forward, pstate->up ); + VectorNormalize( pstate->up ); + + float scale = (1.0 / peyeball->iris_scale) + m_pRC->m_Config.fEyeSize; + + if (scale > 0) + scale = 1.0 / scale; + + VectorScale( &pstate->right[0], -scale, pstate->mat[0] ); + VectorScale( &pstate->up[0], -scale, pstate->mat[1] ); + + pstate->mat[0][3] = -DotProduct( &pstate->org[0], pstate->mat[0] ) + 0.5f; + pstate->mat[1][3] = -DotProduct( &pstate->org[0], pstate->mat[1] ) + 0.5f; + + // FIXME: push out vertices for cornea +} + + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +void CStudioRender::R_StudioEyelidFACS( const mstudioeyeball_t *peyeball, const eyeballstate_t *pstate ) +{ + if ( peyeball->m_bNonFACS ) + return; + + Vector headup; + Vector headforward; + Vector pos; + + float upperlid = DEG2RAD( 9.5 ); + float lowerlid = DEG2RAD( -26.4 ); + + // FIXME: Crash workaround + Vector vecNormTarget; + vecNormTarget.Init( peyeball->uppertarget[0], peyeball->uppertarget[1], peyeball->uppertarget[2] ); + vecNormTarget /= peyeball->radius; + vecNormTarget.x = clamp( vecNormTarget.x, -1.0f, 1.0f ); + vecNormTarget.y = clamp( vecNormTarget.y, -1.0f, 1.0f ); + vecNormTarget.z = clamp( vecNormTarget.z, -1.0f, 1.0f ); + + // get weighted position of eyeball angles based on the "raiser", "neutral", and "lowerer" controls + upperlid = m_pFlexWeights[peyeball->upperflexdesc[0]] * asin( vecNormTarget.x ); + upperlid += m_pFlexWeights[peyeball->upperflexdesc[1]] * asin( vecNormTarget.y ); + upperlid += m_pFlexWeights[peyeball->upperflexdesc[2]] * asin( vecNormTarget.z ); + + vecNormTarget.Init( peyeball->lowertarget[0], peyeball->lowertarget[1], peyeball->lowertarget[2] ); + vecNormTarget /= peyeball->radius; + vecNormTarget.x = clamp( vecNormTarget.x, -1.0f, 1.0f ); + vecNormTarget.y = clamp( vecNormTarget.y, -1.0f, 1.0f ); + vecNormTarget.z = clamp( vecNormTarget.z, -1.0f, 1.0f ); + + lowerlid = m_pFlexWeights[peyeball->lowerflexdesc[0]] * asin( vecNormTarget.x ); + lowerlid += m_pFlexWeights[peyeball->lowerflexdesc[1]] * asin( vecNormTarget.y ); + lowerlid += m_pFlexWeights[peyeball->lowerflexdesc[2]] * asin( vecNormTarget.z ); + + // ConDMsg("%.1f %.1f\n", RAD2DEG( upperlid ), RAD2DEG( lowerlid ) ); + + float sinupper, cosupper, sinlower, coslower; + SinCos( upperlid, &sinupper, &cosupper ); + SinCos( lowerlid, &sinlower, &coslower ); + + // convert to head relative space + VectorIRotate( pstate->up, m_pBoneToWorld[peyeball->bone], headup ); + VectorIRotate( pstate->forward, m_pBoneToWorld[peyeball->bone], headforward ); + + // upper lid + VectorScale( headup, sinupper * peyeball->radius, pos ); + VectorMA( pos, cosupper * peyeball->radius, headforward, pos ); + m_pFlexWeights[peyeball->upperlidflexdesc] = DotProduct( pos, peyeball->up ); + + // lower lid + VectorScale( headup, sinlower * peyeball->radius, pos ); + VectorMA( pos, coslower * peyeball->radius, headforward, pos ); + m_pFlexWeights[peyeball->lowerlidflexdesc] = DotProduct( pos, peyeball->up ); + // ConDMsg("%.4f %.4f\n", m_pRC->m_FlexWeights[peyeball->upperlidflex], m_pRC->m_FlexWeights[peyeball->lowerlidflex] ); +} + + +void CStudioRender::MaterialPlanerProjection( const matrix3x4_t& mat, int count, const Vector *psrcverts, Vector2D *pdesttexcoords ) +{ + for (int i = 0; i < count; i++) + { + pdesttexcoords[i][0] = DotProduct( &psrcverts[i].x, mat[0] ) + mat[0][3]; + pdesttexcoords[i][1] = DotProduct( &psrcverts[i].x, mat[1] ) + mat[1][3]; + } +} + + +//----------------------------------------------------------------------------- +// Ramp and clamp the flex weight +//----------------------------------------------------------------------------- +float CStudioRender::RampFlexWeight( mstudioflex_t &flex, float w ) +{ + if (w <= flex.target0 || w >= flex.target3) + { + // value outside of range + w = 0.0; + } + else if (w < flex.target1) + { + // 0 to 1 ramp + w = (w - flex.target0) / (flex.target1 - flex.target0); + } + else if (w > flex.target2) + { + // 1 to 0 ramp + w = (flex.target3 - w) / (flex.target3 - flex.target2); + } + else + { + // plat + w = 1.0; + } + return w; +} + +//----------------------------------------------------------------------------- +// Setup the flex verts for this rendering +//----------------------------------------------------------------------------- +void CStudioRender::R_StudioFlexVerts( mstudiomesh_t *pmesh, int lod ) +{ + VPROF_BUDGET( "CStudioRender::R_StudioFlexVerts", VPROF_BUDGETGROUP_MODEL_RENDERING ); + + Assert( pmesh ); + + const float flVertAnimFixedPointScale = m_pStudioHdr->VertAnimFixedPointScale(); + + // There's a chance we can actually do the flex twice on a single mesh + // since there's flexed HW + SW portions of the mesh. + if (m_VertexCache.IsFlexComputationDone()) + return; + + // get pointers to geometry + if ( !pmesh->pModel()->CacheVertexData( m_pStudioHdr ) ) + { + // not available yet + return; + } + const mstudio_meshvertexdata_t *vertData = pmesh->GetVertexData( m_pStudioHdr ); + Assert( vertData ); + if ( !vertData ) + { + static unsigned int warnCount = 0; + if ( warnCount++ < 20 ) + Warning( "ERROR: R_StudioFlexVerts, model verts have been compressed, cannot render! (use \"-no_compressed_vvds\")" ); + return; + } + + // The flex data should have been converted to the new (fixed-point) format on load: + Assert( m_pStudioHdr->flags & STUDIOHDR_FLAGS_FLEXES_CONVERTED ); + if ( ( m_pStudioHdr->flags & STUDIOHDR_FLAGS_FLEXES_CONVERTED ) == 0 ) + { + static unsigned int flexConversionTimesWarned = 0; + if ( flexConversionTimesWarned++ < 6 ) + Warning( "ERROR: flex verts have not been converted (queued loader refcount bug?) - expect to see 'exploded' faces" ); + } + + + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + Vector4D *pStudioTangentS; + if ( vertData->HasTangentData() ) + { + pStudioTangentS = vertData->TangentS( 0 ); + } + else + { + pStudioTangentS = NULL; + } + + mstudioflex_t *pflex = pmesh->pFlex( 0 ); + + m_VertexCache.SetupComputation( pmesh, true ); + + // apply flex weights + int i, j, n; + + for (i = 0; i < pmesh->numflexes; i++) + { + float w1 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexdesc ] ); + float w2 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexdesc ] ); + + float w3, w4; + if ( pflex[i].flexpair != 0) + { + w3 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexpair ] ); + w4 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexpair ] ); + } + else + { + w3 = w1; + w4 = w2; + } + + if ( w1 > -0.001 && w1 < 0.001 && w2 > -0.001 && w2 < 0.001 ) + { + if ( w3 > -0.001 && w3 < 0.001 && w4 > -0.001 && w4 < 0.001 ) + { + continue; + } + } + + // We may have wrinkle information for this flex, but if we're software skinning + // we're going to ignore it. + byte *pvanim = pflex[i].pBaseVertanim(); + int nVAnimSizeBytes = pflex[i].VertAnimSizeBytes(); + + for (j = 0; j < pflex[i].numverts; j++) + { + mstudiovertanim_t *pAnim = (mstudiovertanim_t*)( pvanim + j * nVAnimSizeBytes ); + n = pAnim->index; + + // Only flex the indices that are (still) part of this mesh + // need lod restriction here + if (n < pmesh->vertexdata.numLODVertexes[lod]) + { + mstudiovertex_t &vert = pVertices[n]; + + CachedPosNormTan_t* pFlexedVertex; + if (!m_VertexCache.IsVertexFlexed(n)) + { + // Add a new flexed vert to the flexed vertex list + pFlexedVertex = m_VertexCache.CreateFlexVertex(n); + // skip processing if no more flexed verts can be allocated + if (pFlexedVertex == NULL) + continue; + + VectorCopy( vert.m_vecPosition, pFlexedVertex->m_Position ); + VectorCopy( vert.m_vecNormal, pFlexedVertex->m_Normal ); + + if (pStudioTangentS) + { + Vector4DCopy( pStudioTangentS[n], pFlexedVertex->m_TangentS ); + Assert( pFlexedVertex->m_TangentS.w == -1.0f || pFlexedVertex->m_TangentS.w == 1.0f ); + } + } + else + { + pFlexedVertex = m_VertexCache.GetFlexVertex(n); + } + + float s = pAnim->speed * (1.0F/255.0F); + float b = pAnim->side * (1.0F/255.0F); + + float w = (w1 * s + (1.0f - s) * w2) * (1.0f - b) + b * (w3 * s + (1.0f - s) * w4); + + // Accumulate weighted deltas + pFlexedVertex->m_Position += pAnim->GetDeltaFixed( flVertAnimFixedPointScale ) * w; + pFlexedVertex->m_Normal += pAnim->GetNDeltaFixed( flVertAnimFixedPointScale ) * w; + + if ( pStudioTangentS ) + { + pFlexedVertex->m_TangentS.AsVector3D() += pAnim->GetNDeltaFixed( flVertAnimFixedPointScale ) * w; + Assert( pFlexedVertex->m_TangentS.w == -1.0f || pFlexedVertex->m_TangentS.w == 1.0f ); + } + } + } + } + + m_VertexCache.RenormalizeFlexVertices( vertData->HasTangentData() ); +} + +// REMOVED!! Look in version 32 if you need it. +//static void R_StudioEyeballNormals( const mstudioeyeball_t *peyeball, int count, const Vector *psrcverts, Vector *pdestnorms ) + +#define KERNEL_DIAMETER 2 +#define KERNEL_TEXELS (KERNEL_DIAMETER) +#define KERNEL_TEXEL_RADIUS (KERNEL_TEXELS / 2) + +inline float GlintGaussSpotCoefficient( float dx, float dy /*, float *table */ ) +{ + const float radius = KERNEL_DIAMETER / 2; + const float rsq = 1.0f / (radius * radius); + float r2 = (dx * dx + dy * dy) * rsq; + if (r2 <= 1.0f) + { + return exp( -25.0 * r2 ); + // NOTE: This optimization doesn't make much of a difference + //int index = r2 * (GLINT_TABLE_ENTRIES-1); + //return table[index]; + } + + return 0; +} + +void CStudioRender::AddGlint( CPixelWriter &pixelWriter, float x, float y, const Vector& color ) +{ + x = (x + 0.5f) * m_GlintWidth; + y = (y + 0.5f) * m_GlintHeight; + const float texelRadius = KERNEL_DIAMETER / 2; + + int x0 = (int)x; + int y0 = (int)y; + int x1 = x0 + texelRadius; + int y1 = y0 + texelRadius; + x0 -= texelRadius; + y0 -= texelRadius; + + // clip light to texture + if ( (x0 >= m_GlintWidth) || (x1 < 0) || (y0 >= m_GlintHeight) || (y1 < 0) ) + return; + + // clamp coordinates + if ( x0 < 0 ) + { + x0 = 0; + } + if ( y0 < 0 ) + { + y0 = 0; + } + if ( x1 >= m_GlintWidth ) + { + x1 = m_GlintWidth-1; + } + if ( y1 >= m_GlintHeight ) + { + y1 = m_GlintHeight-1; + } + + for (int v = y0; v <= y1; ++v ) + { + pixelWriter.Seek( x0, v ); + + for (int u = x0; u <= x1; ++u ) + { + float fu = ((float)u) - x; + float fv = ((float)v) - y; + const float offset = 0.25; + float intensity = GlintGaussSpotCoefficient( fu-offset, fv-offset ) + + GlintGaussSpotCoefficient( fu+offset, fv-offset ) + + 5 * GlintGaussSpotCoefficient( fu, fv ) + + GlintGaussSpotCoefficient( fu-offset, fv+offset ) + + GlintGaussSpotCoefficient( fu+offset, fv+offset ); + + // NOTE: Old filter code multiplies the signal by 8X, so we will too + intensity *= (4.0f/9.0f); + + // NOTE: It's much faster to do the work in the dest texture than to touch the memory more + // or make more buffers + Vector outColor = intensity * color; + int r, g, b, a; + pixelWriter.ReadPixelNoAdvance( r, g, b, a ); + outColor.x += TextureToLinear(r); + outColor.y += TextureToLinear(g); + outColor.z += TextureToLinear(b); + pixelWriter.WritePixel( LinearToTexture(outColor.x), LinearToTexture(outColor.y), LinearToTexture(outColor.z) ); + } + } +} + + +//----------------------------------------------------------------------------- +// glint +//----------------------------------------------------------------------------- + +// test/stub code +#if 0 +class CEmptyTextureRegen : public ITextureRegenerator +{ +public: + virtual void RegenerateTextureBits( ITexture *pTexture, IVTFTexture *pVTFTexture, Rect_t *pRect ) + { + // get the texture + unsigned char *pTextureData = pVTFTexture->ImageData( 0, 0, 0 ); + int nImageSize = pVTFTexture->ComputeMipSize( 0 ); + memset( pTextureData, 0, nImageSize ); + } + + // We've got a global instance, no need to delete it + virtual void Release() {} +}; +static CEmptyTextureRegen s_GlintTextureRegen; +#endif + +class CGlintTextureRegenerator : public ITextureRegenerator +{ +public: + virtual void RegenerateTextureBits( ITexture *pTexture, IVTFTexture *pVTFTexture, Rect_t *pRect ) + { + // We don't need to reconstitute the bits after a task switch + // since we reconstitute them every frame they are used anyways + if ( !m_pStudioRender ) + return; + + if ( ( m_pStudioRender->m_GlintWidth != pVTFTexture->Width() ) || + ( m_pStudioRender->m_GlintHeight != pVTFTexture->Height() ) ) + { + m_pStudioRender->m_GlintWidth = pVTFTexture->Width(); + m_pStudioRender->m_GlintHeight = pVTFTexture->Height(); + } + + CStudioRender::GlintRenderData_t pRenderData[16]; + int nGlintCount = m_pStudioRender->BuildGlintRenderData( pRenderData, + ARRAYSIZE(pRenderData), m_pState, *m_pVRight, *m_pVUp, *m_pROrigin ); + + // setup glint texture + unsigned char *pTextureData = pVTFTexture->ImageData( 0, 0, 0 ); + CPixelWriter pixelWriter; + pixelWriter.SetPixelMemory( pVTFTexture->Format(), pTextureData, pVTFTexture->RowSizeInBytes( 0 ) ); + int nImageSize = pVTFTexture->ComputeMipSize( 0 ); + memset( pTextureData, 0, nImageSize ); + + // Put in glints due to the lights in the scene + for ( int i = 0; i < nGlintCount; ++i ) + { + // NOTE: AddGlint is a more expensive solution but it looks better close-up + m_pStudioRender->AddGlint( pixelWriter, pRenderData[i].m_vecPosition[0], + pRenderData[i].m_vecPosition[1], pRenderData[i].m_vecIntensity ); + } + } + + // We've got a global instance, no need to delete it + virtual void Release() {} + + const eyeballstate_t *m_pState; + const Vector *m_pVRight; + const Vector *m_pVUp; + const Vector *m_pROrigin; + CStudioRender *m_pStudioRender; +}; + +static CGlintTextureRegenerator s_GlintTextureRegen; + +static ITexture *s_pProcGlint = NULL; +void CStudioRender::PrecacheGlint() +{ + if ( !m_pGlintTexture ) + { + // Begin block in which all render targets should be allocated + g_pMaterialSystem->BeginRenderTargetAllocation(); + + // Get the texture that we are going to be updating procedurally. + m_pGlintTexture = g_pMaterialSystem->CreateNamedRenderTargetTextureEx2( + "_rt_eyeglint", 32, 32, RT_SIZE_NO_CHANGE, IMAGE_FORMAT_BGRA8888, MATERIAL_RT_DEPTH_NONE ); + m_pGlintTexture->IncrementReferenceCount(); + + // Begin block in which all render targets should be allocated + g_pMaterialSystem->EndRenderTargetAllocation(); + + if ( !IsX360() ) + { + // Get the texture that we are going to be updating procedurally. + s_pProcGlint = g_pMaterialSystem->CreateProceduralTexture( + "proc_eyeglint", TEXTURE_GROUP_MODEL, 32, 32, IMAGE_FORMAT_BGRA8888, TEXTUREFLAGS_NOMIP|TEXTUREFLAGS_NOLOD ); + s_pProcGlint->SetTextureRegenerator( &s_GlintTextureRegen ); + } + + // JAY: I don't see this pattern in the code often. It looks like the material system + // would rather than I deal exclusively with IMaterials instead. + // So maybe we should bake the LOD texture into the eyes shader. + // For now, just hardcode one + // UNDONE: Add a $lodtexture to the eyes shader. Maybe add a $lodsize too. + // UNDONE: Make eyes texture load $lodtexture and switch to that here instead of black + m_pGlintLODTexture = g_pMaterialSystem->FindTexture( IsX360() ? "black" : "vgui/black", NULL, false ); + m_pGlintLODTexture->IncrementReferenceCount(); + } +} + +void CStudioRender::UncacheGlint() +{ + if ( m_pGlintTexture ) + { + if ( s_pProcGlint ) + { + s_pProcGlint->SetTextureRegenerator( NULL ); + s_pProcGlint->DecrementReferenceCount(); + s_pProcGlint = NULL; + } + m_pGlintTexture->DecrementReferenceCount(); + m_pGlintTexture = NULL; + m_pGlintLODTexture->DecrementReferenceCount(); + m_pGlintLODTexture = NULL; + } +} + +int CStudioRender::BuildGlintRenderData( GlintRenderData_t *pData, int nMaxGlints, + const eyeballstate_t *pState, const Vector& vright, const Vector& vup, const Vector& r_origin ) +{ + // NOTE: See version 25 for lots of #if 0ed out stuff I removed + Vector viewdelta; + VectorSubtract( r_origin, pState->org, viewdelta ); + VectorNormalize( viewdelta ); + + // hack cornea position + float iris_radius = pState->peyeball->radius * (6.0 / 12.0); + float cornea_radius = pState->peyeball->radius * (8.0 / 12.0); + + Vector cornea; + // position on eyeball that matches iris radius + float er = ( iris_radius / pState->peyeball->radius ); + er = FastSqrt( 1 - er * er ); + + // position on cornea sphere that matches iris radius + float cr = ( iris_radius / cornea_radius ); + cr = FastSqrt( 1 - cr * cr ); + + float r = ( er * pState->peyeball->radius - cr * cornea_radius ); + VectorScale( pState->forward, r, cornea ); + + // get offset for center of cornea + float dx, dy; + dx = DotProduct( vright, cornea ); + dy = DotProduct( vup, cornea ); + + // move cornea to world space + VectorAdd( cornea, pState->org, cornea ); + + Vector delta, intensity; + Vector reflection, coord; + + // Put in glints due to the lights in the scene + int nGlintCount = 0; + for ( int i = 0; R_LightGlintPosition( i, cornea, delta, intensity ); ++i ) + { + VectorNormalize( delta ); + if ( DotProduct( delta, pState->forward ) <= 0 ) + continue; + + VectorAdd( delta, viewdelta, reflection ); + VectorNormalize( reflection ); + + pData[nGlintCount].m_vecPosition[0] = dx + cornea_radius * DotProduct( vright, reflection ); + pData[nGlintCount].m_vecPosition[1] = dy + cornea_radius * DotProduct( vup, reflection ); + pData[nGlintCount].m_vecIntensity = intensity; + if ( ++nGlintCount >= nMaxGlints ) + return nMaxGlints; + + if ( !R_LightGlintPosition( i, pState->org, delta, intensity ) ) + continue; + + VectorNormalize( delta ); + if ( DotProduct( delta, pState->forward ) >= er ) + continue; + + pData[nGlintCount].m_vecPosition[0] = pState->peyeball->radius * DotProduct( vright, reflection ); + pData[nGlintCount].m_vecPosition[1] = pState->peyeball->radius * DotProduct( vup, reflection ); + pData[nGlintCount].m_vecIntensity = intensity; + if ( ++nGlintCount >= nMaxGlints ) + return nMaxGlints; + } + return nGlintCount; +} + + +//----------------------------------------------------------------------------- +// Renders a glint texture procedurally +//----------------------------------------------------------------------------- +ITexture* CStudioRender::RenderGlintTexture( const eyeballstate_t *pState, + const Vector& vright, const Vector& vup, const Vector& r_origin ) +{ + GlintRenderData_t pRenderData[16]; + int nGlintCount = BuildGlintRenderData( pRenderData, ARRAYSIZE(pRenderData), + pState, vright, vup, r_origin ); + + if ( nGlintCount == 0 ) + return m_pGlintLODTexture; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->PushRenderTargetAndViewport( m_pGlintTexture ); + + IMaterial *pPrevMaterial = pRenderContext->GetCurrentMaterial(); + void *pPrevProxy = pRenderContext->GetCurrentProxy(); + int nPrevBoneCount = pRenderContext->GetCurrentNumBones(); + MaterialHeightClipMode_t nPrevClipMode = pRenderContext->GetHeightClipMode( ); + bool bPrevClippingEnabled = pRenderContext->EnableClipping( false ); + bool bInFlashlightMode = pRenderContext->GetFlashlightMode(); + + if ( bInFlashlightMode ) + { + DisableScissor(); + } + pRenderContext->ClearColor4ub( 0, 0, 0, 0 ); + pRenderContext->ClearBuffers( true, false, false ); + + pRenderContext->SetFlashlightMode( false ); + pRenderContext->SetHeightClipMode( MATERIAL_HEIGHTCLIPMODE_DISABLE ); + pRenderContext->SetNumBoneWeights( 0 ); + pRenderContext->Bind( m_pGlintBuildMaterial ); + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PushMatrix(); + pRenderContext->LoadIdentity(); + + pRenderContext->MatrixMode( MATERIAL_VIEW ); + pRenderContext->PushMatrix(); + pRenderContext->LoadIdentity(); + + pRenderContext->MatrixMode( MATERIAL_PROJECTION ); + pRenderContext->PushMatrix(); + pRenderContext->LoadIdentity(); + + CMeshBuilder meshBuilder; + IMesh *pMesh = pRenderContext->GetDynamicMesh( ); + meshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, nGlintCount * 4, nGlintCount * 6 ); + + const float epsilon = 0.5f / 32.0f; + int nIndex = 0; + for ( int i = 0; i < nGlintCount; ++i ) + { + const GlintRenderData_t &glint = pRenderData[i]; + + // Position of glint 0..31 range + float x = (glint.m_vecPosition.x + 0.5f) * m_GlintWidth; + float y = (glint.m_vecPosition.y + 0.5f) * m_GlintHeight; + Vector vGlintCenter = Vector( x, y, 0.0f ); + float ooWidth = 1.0f / (float)m_GlintWidth; + float ooHeight = 1.0f / (float)m_GlintHeight; + + int x0 = floor(x); + int y0 = floor(y); + int x1 = x0 + 1.0f; + int y1 = y0 + 1.0f; + x0 -= 2.0f; // Fill rules make us pad this out more than the procedural version + y0 -= 2.0f; + + float screenX0 = x0 * 2 * ooWidth + epsilon - 1; + float screenX1 = x1 * 2 * ooWidth + epsilon - 1; + float screenY0 = -(y0 * 2 * ooHeight + epsilon - 1); + float screenY1 = -(y1 * 2 * ooHeight + epsilon - 1); + + meshBuilder.Position3f( screenX0, screenY0, 0.0f ); + meshBuilder.TexCoord2f( 0, x0, y0 ); + meshBuilder.TexCoord2fv( 1, vGlintCenter.Base() ); + meshBuilder.TexCoord3fv( 2, glint.m_vecIntensity.Base() ); + meshBuilder.AdvanceVertex(); + + meshBuilder.Position3f( screenX1, screenY0, 0.0f ); + meshBuilder.TexCoord2f( 0, x1, y0 ); + meshBuilder.TexCoord2fv( 1, vGlintCenter.Base() ); + meshBuilder.TexCoord3fv( 2, glint.m_vecIntensity.Base() ); + meshBuilder.AdvanceVertex(); + + meshBuilder.Position3f( screenX1, screenY1, 0.0f ); + meshBuilder.TexCoord2f( 0, x1, y1 ); + meshBuilder.TexCoord2fv( 1, vGlintCenter.Base() ); + meshBuilder.TexCoord3fv( 2, glint.m_vecIntensity.Base() ); + meshBuilder.AdvanceVertex(); + + meshBuilder.Position3f( screenX0, screenY1, 0.0f ); + meshBuilder.TexCoord2f( 0, x0, y1 ); + meshBuilder.TexCoord2fv( 1, vGlintCenter.Base() ); + meshBuilder.TexCoord3fv( 2, glint.m_vecIntensity.Base() ); + meshBuilder.AdvanceVertex(); + + meshBuilder.FastIndex( nIndex ); + meshBuilder.FastIndex( nIndex+1 ); + meshBuilder.FastIndex( nIndex+2 ); + meshBuilder.FastIndex( nIndex ); + meshBuilder.FastIndex( nIndex+2 ); + meshBuilder.FastIndex( nIndex+3 ); + nIndex += 4; + } + + meshBuilder.End(); + pMesh->Draw(); + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PopMatrix(); + + pRenderContext->MatrixMode( MATERIAL_VIEW ); + pRenderContext->PopMatrix(); + + pRenderContext->MatrixMode( MATERIAL_PROJECTION ); + pRenderContext->PopMatrix(); + + if ( IsX360() ) + { + pRenderContext->CopyRenderTargetToTextureEx( m_pGlintTexture, 0, NULL, NULL ); + } + + pRenderContext->PopRenderTargetAndViewport( ); + + pRenderContext->Bind( pPrevMaterial, pPrevProxy ); + pRenderContext->SetNumBoneWeights( nPrevBoneCount ); + pRenderContext->SetHeightClipMode( nPrevClipMode ); + pRenderContext->EnableClipping( bPrevClippingEnabled ); + pRenderContext->SetFlashlightMode( bInFlashlightMode ); + + return m_pGlintTexture; +} + +static ConVar r_glint_procedural( "r_glint_procedural", "0" ); +static ConVar r_glint_alwaysdraw( "r_glint_alwaysdraw", "0" ); + +void CStudioRender::R_StudioEyeballGlint( const eyeballstate_t *pstate, IMaterialVar *pGlintVar, + const Vector& vright, const Vector& vup, const Vector& r_origin ) +{ + // Kick off a PIX event, since this process encompasses a bunch of locks etc... + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + PIXEVENT( pRenderContext, "GenerateEyeballGlint" ); + + // Don't do a procedural glint texture if there are enough pixels covered by the eyeball onscreen, + // and the eye isn't backfaced. + if ( m_pGlintLODTexture && r_glint_alwaysdraw.GetInt() == 0 ) + { + // backfaced or too small to bother? + float pixelArea = pRenderContext->ComputePixelWidthOfSphere( pstate->org, pstate->peyeball->radius ); + if( + // FIXME: this backface doesn't work for something that isn't a plane. + // DotProduct( pstate->forward, m_ViewPlaneNormal ) > 0.0f || + pixelArea < m_pRC->m_Config.fEyeGlintPixelWidthLODThreshold ) + { + // use black glint texture + pGlintVar->SetTextureValue( m_pGlintLODTexture ); + return; + } + } + + // Legacy method for DX8 + if ( !IsX360() && ( r_glint_procedural.GetInt() || g_pMaterialSystemHardwareConfig->GetDXSupportLevel() < 90 ) ) + { + // Set up the texture regenerator + s_GlintTextureRegen.m_pVRight = &vright; + s_GlintTextureRegen.m_pVUp = &vup; + s_GlintTextureRegen.m_pROrigin = &r_origin; + s_GlintTextureRegen.m_pState = pstate; + s_GlintTextureRegen.m_pStudioRender = this; + + // This will cause the glint texture to be re-generated and then downloaded + s_pProcGlint->Download( ); + + // This is necessary to make sure we don't reconstitute the bits + // after coming back from a task switch + s_GlintTextureRegen.m_pStudioRender = NULL; + + // Use the normal glint instead of the black glint + pGlintVar->SetTextureValue( s_pProcGlint ); + } + else // Queued hardware version + { + // Make sure we know the correct size of the glint texture + m_GlintWidth = m_pGlintTexture->GetActualWidth(); + m_GlintHeight = m_pGlintTexture->GetActualHeight(); + + // Render glint render target + ITexture *pUseGlintTexture = RenderGlintTexture( pstate, vright, vup, r_origin ); + + // Use the normal glint instead of the black glint + pGlintVar->SetTextureValue( pUseGlintTexture ); + } +} + +void CStudioRender::ComputeGlintTextureProjection( eyeballstate_t const* pState, + const Vector& vright, const Vector& vup, matrix3x4_t& mat ) +{ + // project eyeball into screenspace texture + float scale = 1.0 / (pState->peyeball->radius * 2); + VectorScale( &vright.x, scale, mat[0] ); + VectorScale( &vup.x, scale, mat[1] ); + + mat[0][3] = -DotProduct( pState->org.Base(), mat[0] ) + 0.5; + mat[1][3] = -DotProduct( pState->org.Base(), mat[1] ) + 0.5; +} + + +/* +void R_MouthLighting( int count, const Vector *psrcverts, const Vector *psrcnorms, Vector4D *pdestlightvalues ) +{ + Vector forward; + + if (m_pStudioHdr->nummouths < 1) return; + + mstudiomouth_t *pMouth = r_pstudiohdr->pMouth( 0 ); // FIXME: this needs to get the mouth index from the shader + + float fIllum = m_FlexWeights[pMouth->flexdesc]; + if (fIllum < 0) fIllum = 0; + if (fIllum > 1) fIllum = 1; + fIllum = LinearToTexture( fIllum ) / 255.0; + + + VectorRotate( pMouth->forward, g_StudioInternalState.boneToWorld[ pMouth->bone ], forward ); + + for (int i = 0; i < count; i++) + { + float dot = -DotProduct( psrcnorms[i], forward ); + if (dot > 0) + { + dot = LinearToTexture( dot ) / 255.0; // FIXME: this isn't robust + VectorScale( pdestlightvalues[i], dot, pdestlightvalues[i] ); + } + else + VectorFill( pdestlightvalues[i], 0 ); + + VectorScale( pdestlightvalues[i], fIllum, pdestlightvalues[i] ); + } +} +*/ + +void CStudioRender::R_MouthComputeLightingValues( float& fIllum, Vector& forward ) +{ + // FIXME: this needs to get the mouth index from the shader + mstudiomouth_t *pMouth = m_pStudioHdr->pMouth( 0 ); + + fIllum = m_pFlexWeights[pMouth->flexdesc]; + if (fIllum < 0) fIllum = 0; + if (fIllum > 1) fIllum = 1; + fIllum = LinearToTexture( fIllum ) / 255.0; + + VectorRotate( pMouth->forward, m_pBoneToWorld[ pMouth->bone ], forward ); +} + +void CStudioRender::R_MouthLighting( float fIllum, const Vector& normal, const Vector& forward, Vector &light ) +{ + float dot = -DotProduct( normal, forward ); + if (dot > 0) + { + VectorScale( light, dot * fIllum, light ); + } + else + { + VectorFill( light, 0 ); + } +} + +static unsigned int illumVarCache = 0; +static unsigned int forwardVarCache = 0; +void CStudioRender::R_MouthSetupVertexShader( IMaterial* pMaterial ) +{ + if (!pMaterial) + return; + + // FIXME: this needs to get the mouth index from the shader + mstudiomouth_t *pMouth = m_pStudioHdr->pMouth( 0 ); + + // Don't deal with illum gamma, we apply it at a different point + // for vertex shaders + float fIllum = m_pFlexWeights[pMouth->flexdesc]; + if (fIllum < 0) fIllum = 0; + if (fIllum > 1) fIllum = 1; + + Vector forward; + VectorRotate( pMouth->forward, m_pBoneToWorld[ pMouth->bone ], forward ); + forward *= -1; + + IMaterialVar* pIllumVar = pMaterial->FindVarFast( "$illumfactor", &illumVarCache ); + if (pIllumVar) + { + pIllumVar->SetFloatValue( fIllum ); + } + + IMaterialVar* pFowardVar = pMaterial->FindVarFast( "$forward", &forwardVarCache ); + if (pFowardVar) + { + pFowardVar->SetVecValue( forward.Base(), 3 ); + } +} diff --git a/studiorender/r_studiogettriangles.cpp b/studiorender/r_studiogettriangles.cpp new file mode 100644 index 0000000..afb96d4 --- /dev/null +++ b/studiorender/r_studiogettriangles.cpp @@ -0,0 +1,166 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//===========================================================================// + +#include "studiorendercontext.h" +#include "optimize.h" +#include "tier0/vprof.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +void CStudioRenderContext::GetTriangles( const DrawModelInfo_t& info, matrix3x4_t *pBoneToWorld, GetTriangles_Output_t &out ) +{ + VPROF( "CStudioRender::GetTriangles"); + + out.m_MaterialBatches.RemoveAll(); // clear out data. + + if( !info.m_pStudioHdr || !info.m_pHardwareData || + !info.m_pHardwareData->m_NumLODs || !info.m_pHardwareData->m_pLODs ) + { + return; + } + + int lod = info.m_Lod; + int lastlod = info.m_pHardwareData->m_NumLODs - 1; + + if ( lod == USESHADOWLOD ) + { + lod = lastlod; + } + else + { + lod = clamp( lod, 0, lastlod ); + } + + // clamp to root lod + if ( lod < info.m_pHardwareData->m_RootLOD) + { + lod = info.m_pHardwareData->m_RootLOD; + } + + int nSkin = info.m_Skin; + if ( nSkin >= info.m_pStudioHdr->numskinfamilies ) + { + nSkin = 0; + } + short *pSkinRef = info.m_pStudioHdr->pSkinref( nSkin * info.m_pStudioHdr->numskinref ); + + studiomeshdata_t *pStudioMeshes = info.m_pHardwareData->m_pLODs[lod].m_pMeshData; + IMaterial **ppMaterials = info.m_pHardwareData->m_pLODs[lod].ppMaterials; + + // Bone to world must be set before calling this function; it uses it here + int boneMask = BONE_USED_BY_VERTEX_AT_LOD(lod); + ComputePoseToWorld( out.m_PoseToWorld, info.m_pStudioHdr, boneMask, m_RC.m_ViewOrigin, pBoneToWorld ); + + int i; + for (i=0 ; i < info.m_pStudioHdr->numbodyparts ; i++) + { + mstudiomodel_t *pModel = NULL; + R_StudioSetupModel( i, info.m_Body, &pModel, info.m_pStudioHdr ); + + // Iterate over all the meshes.... each mesh is a new material + int k; + for ( k = 0; k < pModel->nummeshes; ++k ) + { + GetTriangles_MaterialBatch_t &materialBatch = out.m_MaterialBatches[out.m_MaterialBatches.AddToTail()]; + mstudiomesh_t *pMesh = pModel->pMesh(k); + + if ( !pModel->CacheVertexData( info.m_pStudioHdr ) ) + { + // not available yet + continue; + } + const mstudio_meshvertexdata_t *vertData = pMesh->GetVertexData( info.m_pStudioHdr ); + Assert( vertData ); // This can only return NULL on X360 for now + + // add the verts from this mesh to the materialBatch + materialBatch.m_Verts.SetCount( pMesh->numvertices ); + for ( int vertID = 0; vertID < pMesh->numvertices; vertID++ ) + { + GetTriangles_Vertex_t& vert = materialBatch.m_Verts[vertID]; + + vert.m_Position = *vertData->Position( vertID ); + vert.m_Normal = *vertData->Normal( vertID ); + vert.m_TexCoord = *vertData->Texcoord( vertID ); + + if (vertData->HasTangentData()) + { + vert.m_TangentS = *vertData->TangentS( vertID ); + } +#if _DEBUG + else + { + // ensure any unintended access faults + vert.m_TangentS.Init( VEC_T_NAN, VEC_T_NAN, VEC_T_NAN, VEC_T_NAN ); + } +#endif + vert.m_NumBones = vertData->BoneWeights( vertID )->numbones; + int j; + for ( j = 0; j < vert.m_NumBones; j++ ) + { + vert.m_BoneWeight[j] = vertData->BoneWeights( vertID )->weight[j]; + vert.m_BoneIndex[j] = vertData->BoneWeights( vertID )->bone[j]; + } + } + + IMaterial *pMaterial = ppMaterials[pSkinRef[pMesh->material]]; + Assert( pMaterial ); + materialBatch.m_pMaterial = pMaterial; + studiomeshdata_t *pMeshData = &pStudioMeshes[pMesh->meshid]; + if ( pMeshData->m_NumGroup == 0 ) + continue; + + // Clear out indices + materialBatch.m_TriListIndices.SetCount( 0 ); + + // Iterate over all stripgroups + int stripGroupID; + for ( stripGroupID = 0; stripGroupID < pMeshData->m_NumGroup; stripGroupID++ ) + { + studiomeshgroup_t *pMeshGroup = &pMeshData->m_pMeshGroup[stripGroupID]; +// bool bIsFlexed = ( pMeshGroup->m_Flags & MESHGROUP_IS_FLEXED ) != 0; +// bool bIsHWSkinned = ( pMeshGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) != 0; + + // Iterate over all strips. . . each strip potentially changes bones states. + int stripID; + for ( stripID = 0; stripID < pMeshGroup->m_NumStrips; stripID++ ) + { + OptimizedModel::StripHeader_t *pStripData = &pMeshGroup->m_pStripData[stripID]; +// int boneID; +// for( boneID = 0; boneID < pStripData->numBoneStateChanges; boneID++ ) +// { +// OptimizedModel::BoneStateChangeHeader_t *pBoneStateChange = pStripData->pBoneStateChange( boneID ); +// hardwareBoneToGlobalBone[pBoneStateChange->hardwareID] = pBoneStateChange->newBoneID; +// } + if ( pStripData->flags & OptimizedModel::STRIP_IS_TRILIST ) + { + for ( int i = 0; i < pStripData->numIndices; i += 3 ) + { + int idx = pStripData->indexOffset + i; + materialBatch.m_TriListIndices.AddToTail( pMeshGroup->MeshIndex( idx ) ); + materialBatch.m_TriListIndices.AddToTail( pMeshGroup->MeshIndex( idx + 1 ) ); + materialBatch.m_TriListIndices.AddToTail( pMeshGroup->MeshIndex( idx + 2 ) ); + } + } + else + { + Assert( pStripData->flags & OptimizedModel::STRIP_IS_TRISTRIP ); + for (int i = 0; i < pStripData->numIndices - 2; ++i) + { + int idx = pStripData->indexOffset + i; + bool ccw = (i & 0x1) == 0; + materialBatch.m_TriListIndices.AddToTail( pMeshGroup->MeshIndex( idx ) ); + materialBatch.m_TriListIndices.AddToTail( pMeshGroup->MeshIndex( idx + 1 + ccw ) ); + materialBatch.m_TriListIndices.AddToTail( pMeshGroup->MeshIndex( idx + 2 - ccw ) ); + } + } + } + } + } + } +} diff --git a/studiorender/r_studiolight.cpp b/studiorender/r_studiolight.cpp new file mode 100644 index 0000000..fefc06e --- /dev/null +++ b/studiorender/r_studiolight.cpp @@ -0,0 +1,542 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//===========================================================================// + +#include "r_studiolight.h" +#include "studiorender.h" +#include "studiorendercontext.h" +#include "studio.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "mathlib/vector.h" +#include "mathlib/mathlib.h" +#include <float.h> + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +void R_WorldLightDelta( const LightDesc_t *wl, const Vector& org, Vector& delta ); + + +//----------------------------------------------------------------------------- +// Copies lighting state +//----------------------------------------------------------------------------- +int CopyLocalLightingState( int nMaxLights, LightDesc_t *pDest, int nLightCount, const LightDesc_t *pSrc ) +{ + // ensure we write within array bounds + if ( nLightCount > nMaxLights ) + { + nLightCount = nMaxLights; + } + + for( int i = 0; i < nLightCount; i++ ) + { + LightDesc_t *pLight = &pDest[i]; + memcpy( pLight, &pSrc[i], sizeof( LightDesc_t ) ); + pLight->m_Flags = 0; + if( pLight->m_Attenuation0 != 0.0f ) + { + pLight->m_Flags |= LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0; + } + if( pLight->m_Attenuation1 != 0.0f ) + { + pLight->m_Flags |= LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1; + } + if( pLight->m_Attenuation2 != 0.0f ) + { + pLight->m_Flags |= LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2; + } + } + + return nLightCount; +} + + +//----------------------------------------------------------------------------- +// Computes the ambient term +//----------------------------------------------------------------------------- +void R_LightAmbient_4D( const Vector& normal, Vector4D* pLightBoxColor, Vector &lv ) +{ + VectorScale( normal[0] > 0.f ? pLightBoxColor[0].AsVector3D() : pLightBoxColor[1].AsVector3D(), normal[0]*normal[0], lv ); + VectorMA( lv, normal[1]*normal[1], normal[1] > 0.f ? pLightBoxColor[2].AsVector3D() : pLightBoxColor[3].AsVector3D(), lv ); + VectorMA( lv, normal[2]*normal[2], normal[2] > 0.f ? pLightBoxColor[4].AsVector3D() : pLightBoxColor[5].AsVector3D(), lv ); +} + +#if defined( _WIN32 ) && !defined( _X360 ) +void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, FourVectors &lv ) +{ +// VPROF( "R_LightAmbient" ); + + // !!speed!! compute ambient color cube in sse format + static fltx4 FourZeros={0.,0.,0.,.0}; + + // find the contributions from each axis + fltx4 NegMask=CmpLtSIMD(normal.x,FourZeros); + fltx4 ColorSelect0=ReplicateX4(pLightBoxColor[0].AsVector3D().x); + fltx4 ColorSelect1=ReplicateX4(pLightBoxColor[1].AsVector3D().x); + fltx4 DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + fltx4 NormCompSquared=MulSIMD(normal.x,normal.x); + lv.x=MulSIMD(DirectionalColor,NormCompSquared); + ColorSelect0=ReplicateX4(pLightBoxColor[0].AsVector3D().y); + ColorSelect1=ReplicateX4(pLightBoxColor[1].AsVector3D().y); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + lv.y=MulSIMD(DirectionalColor,NormCompSquared); + ColorSelect0=ReplicateX4(pLightBoxColor[0].AsVector3D().z); + ColorSelect1=ReplicateX4(pLightBoxColor[1].AsVector3D().z); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + lv.z=MulSIMD(DirectionalColor,NormCompSquared); + + NegMask=CmpLtSIMD(normal.y,FourZeros); + ColorSelect0=ReplicateX4(pLightBoxColor[2].AsVector3D().x); + ColorSelect1=ReplicateX4(pLightBoxColor[3].AsVector3D().x); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + NormCompSquared=MulSIMD(normal.y,normal.y); + lv.x=AddSIMD(lv.x,MulSIMD(DirectionalColor,NormCompSquared)); + ColorSelect0=ReplicateX4(pLightBoxColor[2].AsVector3D().y); + ColorSelect1=ReplicateX4(pLightBoxColor[3].AsVector3D().y); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + lv.y=AddSIMD(lv.y,MulSIMD(DirectionalColor,NormCompSquared)); + ColorSelect0=ReplicateX4(pLightBoxColor[2].AsVector3D().z); + ColorSelect1=ReplicateX4(pLightBoxColor[3].AsVector3D().z); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + lv.z=AddSIMD(lv.z,MulSIMD(DirectionalColor,NormCompSquared)); + + NegMask=CmpLtSIMD(normal.z,FourZeros); + ColorSelect0=ReplicateX4(pLightBoxColor[4].AsVector3D().x); + ColorSelect1=ReplicateX4(pLightBoxColor[5].AsVector3D().x); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + NormCompSquared=MulSIMD(normal.z,normal.z); + lv.x=AddSIMD(lv.x,MulSIMD(DirectionalColor,NormCompSquared)); + ColorSelect0=ReplicateX4(pLightBoxColor[4].AsVector3D().y); + ColorSelect1=ReplicateX4(pLightBoxColor[5].AsVector3D().y); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + lv.y=AddSIMD(lv.y,MulSIMD(DirectionalColor,NormCompSquared)); + ColorSelect0=ReplicateX4(pLightBoxColor[4].AsVector3D().z); + ColorSelect1=ReplicateX4(pLightBoxColor[5].AsVector3D().z); + DirectionalColor=OrSIMD(AndSIMD(ColorSelect1,NegMask),AndNotSIMD(NegMask,ColorSelect0)); + lv.z=AddSIMD(lv.z,MulSIMD(DirectionalColor,NormCompSquared)); +} +#endif + + +//----------------------------------------------------------------------------- +// Computes the ambient term, parameters are 3D Vectors for optimization +//----------------------------------------------------------------------------- +void R_LightAmbient_3D( const Vector& normal, const Vector* pLightBoxColor, Vector &lv ) +{ + VectorScale( normal[0] > 0.f ? pLightBoxColor[0] : pLightBoxColor[1], normal[0]*normal[0], lv ); + VectorMA( lv, normal[1]*normal[1], normal[1] > 0.f ? pLightBoxColor[2] : pLightBoxColor[3], lv ); + VectorMA( lv, normal[2]*normal[2], normal[2] > 0.f ? pLightBoxColor[4] : pLightBoxColor[5], lv ); +} + + +//----------------------------------------------------------------------------- +// Set up light[i].dot, light[i].falloff, and light[i].delta for all lights given +// a vertex position "vert". +//----------------------------------------------------------------------------- +void R_LightStrengthWorld( const Vector& vert, int lightcount, LightDesc_t* pDesc, lightpos_t *light ) +{ +// VPROF( "R_LightStrengthWorld" ); + + // NJS: note to self, maybe switch here based on lightcount, so multiple squareroots can be done simeltaneously? + for ( int i = 0; i < lightcount; i++) + { + R_WorldLightDelta( &pDesc[i], vert, light[i].delta ); + light[i].falloff = R_WorldLightDistanceFalloff( &pDesc[i], light[i].delta ); + + VectorNormalizeFast( light[i].delta ); + light[i].dot = DotProduct( light[i].delta, pDesc[i].m_Direction ); + } +} + + +//----------------------------------------------------------------------------- +// Calculate the delta between a light and position +//----------------------------------------------------------------------------- +void R_WorldLightDelta( const LightDesc_t *wl, const Vector& org, Vector& delta ) +{ + switch (wl->m_Type) + { + case MATERIAL_LIGHT_POINT: + case MATERIAL_LIGHT_SPOT: + VectorSubtract( wl->m_Position, org, delta ); + break; + + case MATERIAL_LIGHT_DIRECTIONAL: + VectorMultiply( wl->m_Direction, -1, delta ); + break; + + default: + // Bug: need to return an error + Assert( 0 ); + break; + } +} + + +//#define NO_AMBIENT_CUBE 1 + +// TODO: cone clipping calc's wont work for boxlight since the player asks for a single point. Not sure what the volume is. +TEMPLATE_FUNCTION_TABLE( void, R_LightEffectsWorldFunctionTable, ( const LightDesc_t* pLightDesc, const lightpos_t *light, const Vector& normal, Vector &dest ), 256 ) +{ + enum + { + LightType1 = ( nArgument & 0xC0 ) >> 6, + LightType2 = ( nArgument & 0x30 ) >> 4, + LightType3 = ( nArgument & 0x0C ) >> 2, + LightType4 = ( nArgument & 0x03 ) + }; + + // VPROF( "R_LightEffectsWorld" ); + + #ifdef NO_AMBIENT_CUBE + dest[0] = dest[1] = dest[2] = 0.0f; + #endif + + // FIXME: lighting effects for normal and position are independent! + // FIXME: these can be pre-calculated per normal + if( (LightType_t)LightType1 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[0].falloff * CWorldLightAngleWrapper<LightType1>::WorldLightAngle( &pLightDesc[0], pLightDesc[0].m_Direction, normal, light[0].delta ); + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[0].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } + + if( (LightType_t)LightType2 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[1].falloff * CWorldLightAngleWrapper<LightType2>::WorldLightAngle( &pLightDesc[1], pLightDesc[1].m_Direction, normal, light[1].delta ); + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[1].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } + + if( (LightType_t)LightType3 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[2].falloff * CWorldLightAngleWrapper<LightType3>::WorldLightAngle( &pLightDesc[2], pLightDesc[2].m_Direction, normal, light[2].delta ); + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[2].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } + + if( (LightType_t)LightType4 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[3].falloff * CWorldLightAngleWrapper<LightType4>::WorldLightAngle( &pLightDesc[3], pLightDesc[3].m_Direction, normal, light[3].delta ); + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[3].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } +} + +TEMPLATE_FUNCTION_TABLE( void, R_LightEffectsWorldFunctionTableConstDirectional, ( const LightDesc_t* pLightDesc, const lightpos_t *light, const Vector& normal, Vector &dest, float flDirectionalConstant ), 256 ) +{ + enum + { + LightType1 = ( nArgument & 0xC0 ) >> 6, + LightType2 = ( nArgument & 0x30 ) >> 4, + LightType3 = ( nArgument & 0x0C ) >> 2, + LightType4 = ( nArgument & 0x03 ) + }; + + // VPROF( "R_LightEffectsWorld" ); + +#ifdef NO_AMBIENT_CUBE + dest[0] = dest[1] = dest[2] = 0.0f; +#endif + + // FIXME: lighting effects for normal and position are independent! + // FIXME: these can be pre-calculated per normal + if( (LightType_t)LightType1 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[0].falloff * + CWorldLightAngleWrapperConstDirectional<LightType1>::WorldLightAngle( &pLightDesc[0], + pLightDesc[0].m_Direction, normal, light[0].delta, flDirectionalConstant ); + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[0].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } + + if( (LightType_t)LightType2 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[1].falloff * + CWorldLightAngleWrapperConstDirectional<LightType2>::WorldLightAngle( &pLightDesc[1], + pLightDesc[1].m_Direction, normal, light[1].delta, flDirectionalConstant ); + + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[1].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } + + if( (LightType_t)LightType3 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[2].falloff * + CWorldLightAngleWrapperConstDirectional<LightType3>::WorldLightAngle( &pLightDesc[2], + pLightDesc[2].m_Direction, normal, light[2].delta, flDirectionalConstant ); + + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[2].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } + + if( (LightType_t)LightType4 != MATERIAL_LIGHT_DISABLE ) + { + float ratio = light[3].falloff * + CWorldLightAngleWrapperConstDirectional<LightType4>::WorldLightAngle( &pLightDesc[3], + pLightDesc[3].m_Direction, normal, light[3].delta, flDirectionalConstant ); + + if (ratio > 0) + { + const float* pColor = (float*)&pLightDesc[3].m_Color; + dest[0] += pColor[0] * ratio; + dest[1] += pColor[1] * ratio; + dest[2] += pColor[2] * ratio; + } + } +} + + +//----------------------------------------------------------------------------- +// Get the function table index +//----------------------------------------------------------------------------- +static int s_pLightMask[ 5 ] = +{ + 0, // No lights + 0xC0, // 1 light + 0xF0, // 2 lights + 0xFC, // 3 lights + 0xFF, // 4 lights +}; + +inline int R_LightEffectsWorldIndex(const LightDesc_t* pLightDesc, int nNumLights) +{ + if ( nNumLights > 4 ) + { + nNumLights = 4; + } + + int nIndex = ((pLightDesc[0].m_Type & 0x3) << 6) | ((pLightDesc[1].m_Type & 0x3) << 4) | ( (pLightDesc[2].m_Type & 0x3) << 2) | (pLightDesc[3].m_Type & 0x3); + nIndex &= s_pLightMask[ nNumLights ]; + + Assert( nIndex >= 0 && nIndex < R_LightEffectsWorldFunctionTable::count ); + return nIndex; +} + + +/* + light_direction (light_pos - vertex_pos) +*/ +// TODO: move cone calcs to position +// TODO: cone clipping calc's wont work for boxlight since the player asks for a single point. Not sure what the volume is. +TEMPLATE_FUNCTION_TABLE( float, R_WorldLightDistanceFalloffFunctionTable, ( const LightDesc_t *wl, const Vector& delta ), 8) +{ + Assert( nArgument != 0 ); + + float dist2 = DotProduct( delta, delta ); + + // Cull out light beyond this radius + if (wl->m_Range != 0.f) + { + if (dist2 > wl->m_Range * wl->m_Range) + return 0.0f; + } + + // The general purpose equation: + float fTotal = FLT_EPSILON; + + if( nArgument & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 ) + { + fTotal = wl->m_Attenuation0; + } + + if( nArgument & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 ) + { + fTotal += wl->m_Attenuation1 * FastSqrt( dist2 ); + } + + if( nArgument & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 ) + { + fTotal += wl->m_Attenuation2 * dist2; + } + + return 1.0f / fTotal; +} + +//----------------------------------------------------------------------------- +// Calculate the falloff from the world lights +//----------------------------------------------------------------------------- +float FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const Vector& delta ) +{ + // Ensure no invalid flags are set + Assert( ! ( wl->m_Flags & ~(LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0|LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1|LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2|LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED) ) ); + + // calculate falloff + int flags = wl->m_Flags & (LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0|LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1|LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2); + return R_WorldLightDistanceFalloffFunctionTable::functions[flags](wl, delta); +} + +#if defined( _WIN32 ) && !defined( _X360 ) +fltx4 FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const FourVectors &delta ) +{ + // !!speed!!: lights could store m_Attenuation2,m_Attenuation1, and m_Range^2 copies in replicated SSE format. + + // Ensure no invalid flags are set + Assert( ! ( wl->m_Flags & ~(LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0|LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1|LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2|LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED) ) ); + + fltx4 dist2 = delta*delta; + + fltx4 fTotal; + + if( wl->m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 ) + { + fTotal = ReplicateX4(wl->m_Attenuation0); + } + else + fTotal= ReplicateX4(FLT_EPSILON); // !!speed!! replicate + + if( wl->m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 ) + { + fTotal=AddSIMD(fTotal,MulSIMD(ReplicateX4(wl->m_Attenuation1),SqrtEstSIMD(dist2))); + } + + if( wl->m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 ) + { + fTotal=AddSIMD(fTotal,MulSIMD(ReplicateX4(wl->m_Attenuation2),dist2)); + } + + fTotal=ReciprocalEstSIMD(fTotal); + // Cull out light beyond this radius + // now, zero out elements for which dist2 was > range^2. !!speed!! lights should store dist^2 in sse format + if (wl->m_Range != 0.f) + { + fltx4 RangeSquared = ReplicateX4(wl->m_Range*wl->m_Range); // !!speed!! + fTotal=AndSIMD(fTotal,CmpLtSIMD(dist2,RangeSquared)); + } + return fTotal; +} +#endif + + +int CStudioRender::R_LightGlintPosition( int index, const Vector& org, Vector& delta, Vector& intensity ) +{ + if (index >= m_pRC->m_NumLocalLights) + return false; + + R_WorldLightDelta( &m_pRC->m_LocalLights[index], org, delta ); + float falloff = R_WorldLightDistanceFalloff( &m_pRC->m_LocalLights[index], delta ); + + VectorMultiply( m_pRC->m_LocalLights[index].m_Color, falloff, intensity ); + return true; +} + + +//----------------------------------------------------------------------------- +// Setup up the function table +//----------------------------------------------------------------------------- +void CStudioRender::R_InitLightEffectsWorld3() +{ + // set the function pointer + int index = R_LightEffectsWorldIndex( m_pRC->m_LocalLights, m_pRC->m_NumLocalLights ); + R_LightEffectsWorld3 = R_LightEffectsWorldFunctionTable::functions[index]; +} + + +//----------------------------------------------------------------------------- +// Performs lighting functions common to the ComputeLighting and ComputeLightingConstantDirectional +// returns the index of the LightEffectsWorldFunction to use +//----------------------------------------------------------------------------- +static int ComputeLightingCommon( const Vector* pAmbient, int lightCount, + LightDesc_t* pLights, const Vector& pt, const Vector& normal, lightpos_t *pLightPos, Vector& lighting ) +{ + // Set up lightpos[i].dot, lightpos[i].falloff, and lightpos[i].delta for all lights + R_LightStrengthWorld( pt, lightCount, pLights, pLightPos ); + + // calculate ambient values from the ambient cube given a normal. + R_LightAmbient_3D( normal, pAmbient, lighting ); + + return R_LightEffectsWorldIndex( pLights, lightCount ); +} + + +//----------------------------------------------------------------------------- +// Compute the lighting at a point and normal +// Final Lighting is in linear space +//----------------------------------------------------------------------------- +void CStudioRenderContext::ComputeLighting( const Vector* pAmbient, int lightCount, + LightDesc_t* pLights, const Vector& pt, const Vector& normal, Vector& lighting ) +{ + if ( m_RC.m_Config.fullbright ) + { + lighting.Init( 1.0f, 1.0f, 1.0f ); + return; + } + + if ( lightCount > ARRAYSIZE( m_pLightPos ) ) + { + AssertMsg( 0, "Light count out of range in ComputeLighting\n" ); + lightCount = ARRAYSIZE( m_pLightPos ); + } + + // Calculate color given lightpos_t lightpos, a normal, and the ambient + // color from the ambient cube calculated in ComputeLightingCommon + int index = ComputeLightingCommon( pAmbient, lightCount, pLights, pt, normal, m_pLightPos, lighting ); + R_LightEffectsWorldFunctionTable::functions[index]( pLights, m_pLightPos, normal, lighting ); +} + + +//----------------------------------------------------------------------------- +// Compute the lighting at a point and normal +// Final Lighting is in linear space +// Uses flDirectionalAmount instead of directional components of lights +//----------------------------------------------------------------------------- +void CStudioRenderContext::ComputeLightingConstDirectional( const Vector* pAmbient, int lightCount, + LightDesc_t* pLights, const Vector& pt, const Vector& normal, Vector& lighting, float flDirectionalAmount ) +{ + if ( m_RC.m_Config.fullbright ) + { + lighting.Init( 1.0f, 1.0f, 1.0f ); + return; + } + + if ( lightCount > ARRAYSIZE( m_pLightPos ) ) + { + AssertMsg( 0, "Light count out of range in ComputeLighting\n" ); + lightCount = ARRAYSIZE( m_pLightPos ); + } + + // Calculate color given lightpos_t lightpos, a normal, and the ambient + // color from the ambient cube calculated in ComputeLightingCommon + int index = ComputeLightingCommon( pAmbient, lightCount, pLights, pt, normal, m_pLightPos, lighting ); + R_LightEffectsWorldFunctionTableConstDirectional::functions[index]( pLights, m_pLightPos, normal, lighting, flDirectionalAmount ); +}
\ No newline at end of file diff --git a/studiorender/r_studiolight.h b/studiorender/r_studiolight.h new file mode 100644 index 0000000..9a984b9 --- /dev/null +++ b/studiorender/r_studiolight.h @@ -0,0 +1,49 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: Stateless light computation routines +// +//===========================================================================// + +#ifndef R_STUDIOLIGHT_H +#define R_STUDIOLIGHT_H +#ifdef _WIN32 +#pragma once +#endif + + +#include "tier0/platform.h" + +#if defined( _WIN32 ) && !defined( _X360 ) +#include <xmmintrin.h> +#endif + + +//----------------------------------------------------------------------------- +// Forward declarations +//----------------------------------------------------------------------------- +class Vector; +class Vector4D; +class FourVectors; +struct lightpos_t; +struct LightDesc_t; + + +//----------------------------------------------------------------------------- +// Stateless light computation routines +//----------------------------------------------------------------------------- + +// Computes the ambient term +void R_LightAmbient_4D( const Vector& normal, Vector4D* pLightBoxColor, Vector &lv ); +void R_LightStrengthWorld( const Vector& vert, int lightcount, LightDesc_t* pLightDesc, lightpos_t *light ); +float FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const Vector& delta ); + +// Copies lighting state into a buffer, returns number of lights copied +int CopyLocalLightingState( int nMaxLights, LightDesc_t *pDest, int nLightCount, const LightDesc_t *pSrc ); + +#if defined( _WIN32 ) && !defined( _X360 ) +// SSE optimized versions +void R_LightAmbient_4D( const FourVectors& normal, Vector4D* pLightBoxColor, FourVectors &lv ); +__m128 FASTCALL R_WorldLightDistanceFalloff( const LightDesc_t *wl, const FourVectors& delta ); +#endif + +#endif // R_STUDIOLIGHT_H diff --git a/studiorender/r_studiostats.cpp b/studiorender/r_studiostats.cpp new file mode 100644 index 0000000..ad24192 --- /dev/null +++ b/studiorender/r_studiostats.cpp @@ -0,0 +1,389 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#include "studiorender.h" +#include "studiorendercontext.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "materialsystem/imaterial.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/imesh.h" +#include "optimize.h" +#include "mathlib/vmatrix.h" +#include "tier0/vprof.h" +#include "tier1/strtools.h" +#include "tier1/KeyValues.h" +#include "tier0/memalloc.h" +#include "convar.h" +#include "materialsystem/itexture.h" +#include "tier2/tier2.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + + +static ConVar r_studio_stats( "r_studio_stats", "0", FCVAR_CHEAT ); + +inline float TriangleArea( const Vector &v0, const Vector &v1, const Vector &v2 ) +{ + Vector vecEdge0, vecEdge1, vecCross; + VectorSubtract( v1, v0, vecEdge0 ); + VectorSubtract( v2, v0, vecEdge1 ); + CrossProduct( vecEdge0, vecEdge1, vecCross ); + return ( VectorLength( vecCross ) * 0.5f ); +} + + +void CStudioRender::R_GatherStats( studiomeshgroup_t *pGroup, CMeshBuilder &MeshBuilder, IMesh *pMesh, IMaterial *pMaterial ) +{ + int nCount = 0; + float flSurfaceArea = 0.0f; + float flTriSurfaceArea = 0.0f; + float flTextureSurfaceArea = 0.0f; + int nFrontFacing = 0; + CUtlVector< Vector > Positions; + CUtlVector< Vector2D > TextureCoords; + CUtlVector< short > Indexes; + CUtlVector< float > TriAreas; + CUtlVector< float > TextureAreas; + int nTextureWidth = 0; + int nTextureHeight = 0; + IMaterialVar **pMaterialVar = pMaterial->GetShaderParams(); + + for( int i = 0; i < pMaterial->ShaderParamCount(); i++ ) + { + if ( pMaterialVar[ i ]->IsTexture() == false ) + { + continue; + } + + ITexture *pTexture = pMaterialVar[ i ]->GetTextureValue(); + if ( pTexture == NULL ) + { + continue; + } + + int nWidth = pTexture->GetActualWidth(); + if ( nWidth > nTextureWidth ) + { + nTextureWidth = nWidth; + } + int nHeight = pTexture->GetActualHeight(); + if ( nHeight > nTextureHeight ) + { + nTextureHeight = nHeight; + } + } + + Vector2D vTextureSize( nTextureWidth, nTextureHeight ); + + VMatrix m_ViewMatrix, m_ProjectionMatrix, m_ViewProjectionMatrix; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->GetMatrix( MATERIAL_VIEW, &m_ViewMatrix ); + pRenderContext->GetMatrix( MATERIAL_PROJECTION, &m_ProjectionMatrix ); + MatrixMultiply( m_ProjectionMatrix, m_ViewMatrix, m_ViewProjectionMatrix ); + + Positions.EnsureCapacity( MeshBuilder.VertexCount() ); + Positions.SetCount( MeshBuilder.VertexCount() ); + TextureCoords.EnsureCapacity( MeshBuilder.VertexCount() ); + TextureCoords.SetCount( MeshBuilder.VertexCount() ); + for( int i = 0; i < MeshBuilder.VertexCount(); i++ ) + { + MeshBuilder.SelectVertex( i ); + Positions[ i ] = *( const Vector * )MeshBuilder.Position(); + TextureCoords[ i ] = ( *( const Vector2D * )MeshBuilder.TexCoord( 0 ) ) * vTextureSize; + } + + int nNumIndexes = 0; + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[ j ]; + nNumIndexes += pStrip->numIndices; + } + + Indexes.EnsureCapacity( nNumIndexes ); + Indexes.SetCount( nNumIndexes ); + TriAreas.EnsureCapacity( nNumIndexes / 3 ); + TriAreas.SetCount( nNumIndexes / 3 ); + TextureAreas.EnsureCapacity( nNumIndexes / 3 ); + TextureAreas.SetCount( nNumIndexes / 3 ); + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[ j ]; + for( int i = 0; i < pStrip->numIndices; i += 3 ) + { + Indexes[ pStrip->indexOffset + i ] = pGroup->m_pIndices[ pStrip->indexOffset + i ]; + Indexes[ pStrip->indexOffset + i + 1 ] = pGroup->m_pIndices[ pStrip->indexOffset + i + 1 ]; + Indexes[ pStrip->indexOffset + i + 2 ] = pGroup->m_pIndices[ pStrip->indexOffset + i + 2 ]; + TriAreas[ ( pStrip->indexOffset + i ) / 3 ] = 0.0f; + TextureAreas[ ( pStrip->indexOffset + i ) / 3 ] = 0.0f; + } + } + + const float UNIFORM_SCREEN_WIDTH = 1600.0f; + const float UNIFORM_SCREEN_HEIGHT = 1200.0f; + + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[ j ]; + + for( int i = 0; i < pStrip->numIndices; i += 3 ) + { + int nIndex1 = pGroup->m_pIndices[ pStrip->indexOffset + i ]; + int nIndex2 = pGroup->m_pIndices[ pStrip->indexOffset + i + 1 ]; + int nIndex3 = pGroup->m_pIndices[ pStrip->indexOffset + i + 2 ]; + + MeshBuilder.SelectVertex( nIndex1 ); + const float *pPos1 = MeshBuilder.Position(); + + MeshBuilder.SelectVertex( nIndex2 ); + const float *pPos2 = MeshBuilder.Position(); + + MeshBuilder.SelectVertex( nIndex3 ); + const float *pPos3 = MeshBuilder.Position(); + + float flTriArea = TriangleArea( *( const Vector * )( pPos1 ), *( const Vector * )( pPos2 ), *( const Vector * )( pPos3 ) ); + flSurfaceArea += flTriArea; + + Vector V1View, V2View, V3View; + + m_ViewProjectionMatrix.V3Mul( *( const Vector * )( pPos1 ), V1View ); + m_ViewProjectionMatrix.V3Mul( *( const Vector * )( pPos2 ), V2View ); + m_ViewProjectionMatrix.V3Mul( *( const Vector * )( pPos3 ), V3View ); + + Vector vNormal; + float flIntercept; + ComputeTrianglePlane( V1View, V2View, V3View, vNormal, flIntercept ); + + V1View = ( V1View * 0.5f ) + Vector( 0.5f, 0.5f, 0.5f ); + V1View *= Vector( UNIFORM_SCREEN_WIDTH, UNIFORM_SCREEN_HEIGHT, 1.0f ); + V2View = ( V2View * 0.5f ) + Vector( 0.5f, 0.5f, 0.5f ); + V2View *= Vector( UNIFORM_SCREEN_WIDTH, UNIFORM_SCREEN_HEIGHT, 1.0f ); + V3View = ( V3View * 0.5f ) + Vector( 0.5f, 0.5f, 0.5f ); + V3View *= Vector( UNIFORM_SCREEN_WIDTH, UNIFORM_SCREEN_HEIGHT, 1.0f ); + + flTriArea = -TriArea2D( V1View, V2View, V3View ); + if ( flTriArea > 0.0f ) + { + nFrontFacing++; + + flTriSurfaceArea += flTriArea; + TriAreas[ ( pStrip->indexOffset + i ) / 3 ] = flTriArea; + + Vector2D TexV1 = TextureCoords[ nIndex1 ]; + Vector2D TexV2 = TextureCoords[ nIndex2 ]; + Vector2D TexV3 = TextureCoords[ nIndex3 ]; + + flTriArea = fabs( TriArea2D( TexV1, TexV2, TexV3 ) ); + flTextureSurfaceArea += flTriArea; + TextureAreas[ ( pStrip->indexOffset + i ) / 3 ] = flTriArea; + } + } + + nCount += pStrip->numIndices; + } + +// Msg( "%d / %d / %g / %g ||| %d / %g\n", MeshBuilder.VertexCount(), nCount, flSurfaceArea, flTriSurfaceArea, nFrontFacing, flTriSurfaceArea / (float)nFrontFacing ); + + for( int i = 0; i < MeshBuilder.VertexCount(); i++ ) + { + MeshBuilder.SelectVertex( i ); + MeshBuilder.Position3f( 0.0f, 0.0f, 0.0f ); + } + + MeshBuilder.End(); + pMesh->MarkAsDrawn(); + + pMaterial = materials->FindMaterial( "debug/modelstats", TEXTURE_GROUP_OTHER ); + pRenderContext->Bind( pMaterial ); + + int nRenderCount = -1; + + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[ j ]; + + for( int i = 0; i < pStrip->numIndices; i += 3 ) + { + if ( nRenderCount >= 10000 || nRenderCount == -1 ) + { + if ( nRenderCount >= 0 ) + { + MeshBuilder.End( false, true ); + } + + pMesh = pRenderContext->GetDynamicMeshEx( false ); + nRenderCount = 0; + + if ( nFrontFacing > 10000 ) + { + MeshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, 10000 ); + nFrontFacing -= 10000; + + } + else + { + MeshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, nFrontFacing ); + } + } + + int nIndex1 = Indexes[ pStrip->indexOffset + i ]; + int nIndex2 = Indexes[ pStrip->indexOffset + i + 1 ]; + int nIndex3 = Indexes[ pStrip->indexOffset + i + 2 ]; + + float flArea = TriAreas[ ( pStrip->indexOffset + i ) / 3 ]; + if ( flArea > 0.0f ) + { + Vector vColor; + + if ( r_studio_stats.GetInt() == 1 ) + { + if ( flArea < 20.0f ) + { + vColor.Init( 1.0f, 0.0f, 0.0f ); + } + else if ( flArea < 50.0f ) + { + vColor.Init( 1.0f, 0.565f, 0.0f ); + } + else if ( flArea < 100.0f ) + { + vColor.Init( 1.0f, 0.871f, 0.0f ); + } + else if ( flArea < 200.0f ) + { + vColor.Init( 0.701f, 1.0f, 0.0f ); + } + else + { + vColor.Init( 0.0f, 1.0f, 0.0f ); + } + } + else + { + float flArea = TextureAreas[ ( pStrip->indexOffset + i ) / 3 ] / TriAreas[ ( pStrip->indexOffset + i ) / 3 ]; + + if ( flArea >= 16.0f ) + { + vColor.Init( 1.0f, 0.0f, 0.0f ); + } + else if ( flArea >= 8.0f ) + { + vColor.Init( 1.0f, 0.565f, 0.0f ); + } + else if ( flArea >= 4.0f ) + { + vColor.Init( 1.0f, 0.871f, 0.0f ); + } + else if ( flArea >= 2.0f ) + { + vColor.Init( 0.701f, 1.0f, 0.0f ); + } + else if ( flArea >= 1.0f ) + { + vColor.Init( 0.0f, 1.0f, 0.0f ); + } + else + { + vColor.Init( 0.0f, 0.871f, 1.0f ); + } + } + + MeshBuilder.Position3fv( Positions[ nIndex1 ].Base() ); + MeshBuilder.Color3fv( vColor.Base() ); + MeshBuilder.AdvanceVertex(); + + MeshBuilder.Position3fv( Positions[ nIndex2 ].Base() ); + MeshBuilder.Color3fv( vColor.Base() ); + MeshBuilder.AdvanceVertex(); + + MeshBuilder.Position3fv( Positions[ nIndex3 ].Base() ); + MeshBuilder.Color3fv( vColor.Base() ); + MeshBuilder.AdvanceVertex(); + nRenderCount++; + } + } + } + + if ( nRenderCount >= 0 ) + { + MeshBuilder.End( false, true ); + } +} + + + +//----------------------------------------------------------------------------- +// Main model rendering entry point +//----------------------------------------------------------------------------- +void CStudioRender::ModelStats( const DrawModelInfo_t& info, const StudioRenderContext_t &rc, + matrix3x4_t *pBoneToWorld, const FlexWeights_t &flex, int flags ) +{ + StudioRenderContext_t StatsRC = rc; + + StatsRC.m_Config.m_bStatsMode = true; + + m_pRC = const_cast< StudioRenderContext_t* >( &StatsRC ); + m_pFlexWeights = flex.m_pFlexWeights; + m_pFlexDelayedWeights = flex.m_pFlexDelayedWeights; + m_pBoneToWorld = pBoneToWorld; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // Disable flex if we're told to... + bool flexConfig = m_pRC->m_Config.bFlex; + if (flags & STUDIORENDER_DRAW_NO_FLEXES) + { + m_pRC->m_Config.bFlex = false; + } + + // Enable wireframe if we're told to... + bool bWireframe = m_pRC->m_Config.bWireframe; + if ( flags & STUDIORENDER_DRAW_WIREFRAME ) + { + m_pRC->m_Config.bWireframe = true; + } + + int boneMask = BONE_USED_BY_VERTEX_AT_LOD( info.m_Lod ); + + // Preserve the matrices if we're skinning + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PushMatrix(); + pRenderContext->LoadIdentity(); + + m_VertexCache.StartModel(); + + m_pStudioHdr = info.m_pStudioHdr; + m_pStudioMeshes = info.m_pHardwareData->m_pLODs[info.m_Lod].m_pMeshData; + + // Bone to world must be set before calling drawmodel; it uses that here + ComputePoseToWorld( m_PoseToWorld, m_pStudioHdr, boneMask, m_pRC->m_ViewOrigin, pBoneToWorld ); + + R_StudioRenderModel( pRenderContext, info.m_Skin, info.m_Body, info.m_HitboxSet, info.m_pClientEntity, + info.m_pHardwareData->m_pLODs[info.m_Lod].ppMaterials, + info.m_pHardwareData->m_pLODs[info.m_Lod].pMaterialFlags, flags, boneMask, info.m_Lod, info.m_pColorMeshes); + + // Restore the matrices if we're skinning + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PopMatrix(); + + // Restore the configs + m_pRC->m_Config.bFlex = flexConfig; + m_pRC->m_Config.bWireframe = bWireframe; + +#ifdef REPORT_FLEX_STATS + GetFlexStats(); +#endif + + StatsRC.m_Config.m_bStatsMode = false; + + pRenderContext->SetNumBoneWeights( 0 ); + m_pRC = NULL; + m_pBoneToWorld = NULL; + m_pFlexWeights = NULL; + m_pFlexDelayedWeights = NULL; +} diff --git a/studiorender/studiorender.cpp b/studiorender/studiorender.cpp new file mode 100644 index 0000000..6749052 --- /dev/null +++ b/studiorender/studiorender.cpp @@ -0,0 +1,762 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#include <stdlib.h> +#include "studiorender.h" +#include "studiorendercontext.h" +#include "materialsystem/imaterialsystem.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "materialsystem/imaterial.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/imesh.h" +#include "optimize.h" +#include "mathlib/vmatrix.h" +#include "tier0/vprof.h" +#include "tier1/strtools.h" +#include "tier1/KeyValues.h" +#include "tier0/memalloc.h" +#include "convar.h" +#include "materialsystem/itexture.h" +#include "tier2/tier2.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +//----------------------------------------------------------------------------- +// Singleton instance +//----------------------------------------------------------------------------- +CStudioRender g_StudioRender; +CStudioRender *g_pStudioRenderImp = &g_StudioRender; + + +//----------------------------------------------------------------------------- +// Activate to get stats +//----------------------------------------------------------------------------- +//#define REPORT_FLEX_STATS 1 + +#ifdef REPORT_FLEX_STATS +static int s_nModelsDrawn = 0; +static int s_nActiveFlexCount = 0; +static ConVar r_flexstats( "r_flexstats", "0", FCVAR_CHEAT ); +#endif + + +//----------------------------------------------------------------------------- +// Constructor +//----------------------------------------------------------------------------- +CStudioRender::CStudioRender() +{ + m_pRC = NULL; + m_pBoneToWorld = NULL; + m_pFlexWeights = NULL; + m_pFlexDelayedWeights = NULL; + m_pStudioHdr = NULL; + m_pStudioMeshes = NULL; + m_pSubModel = NULL; + m_pGlintTexture = NULL; + m_GlintWidth = 0; + m_GlintHeight = 0; + + // Cache-align our important matrices + MemAlloc_PushAllocDbgInfo( __FILE__, __LINE__ ); + + m_PoseToWorld = (matrix3x4_t*)MemAlloc_AllocAligned( MAXSTUDIOBONES * sizeof(matrix3x4_t), 32 ); + m_PoseToDecal = (matrix3x4_t*)MemAlloc_AllocAligned( MAXSTUDIOBONES * sizeof(matrix3x4_t), 32 ); + + MemAlloc_PopAllocDbgInfo(); + m_nDecalId = 1; +} + +CStudioRender::~CStudioRender() +{ + MemAlloc_FreeAligned(m_PoseToWorld); + MemAlloc_FreeAligned(m_PoseToDecal); +} + +void CStudioRender::InitDebugMaterials( void ) +{ + m_pMaterialMRMWireframe = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugmrmwireframe", TEXTURE_GROUP_OTHER, true ); + m_pMaterialMRMWireframe->IncrementReferenceCount(); + + m_pMaterialMRMWireframeZBuffer = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugmrmwireframezbuffer", TEXTURE_GROUP_OTHER, true ); + m_pMaterialMRMWireframeZBuffer->IncrementReferenceCount(); + + m_pMaterialMRMNormals = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugmrmnormals", TEXTURE_GROUP_OTHER, true ); + m_pMaterialMRMNormals->IncrementReferenceCount(); + + m_pMaterialTangentFrame = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugvertexcolor", TEXTURE_GROUP_OTHER, true ); + m_pMaterialTangentFrame->IncrementReferenceCount(); + + m_pMaterialTranslucentModelHulls = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugtranslucentmodelhulls", TEXTURE_GROUP_OTHER, true ); + m_pMaterialTranslucentModelHulls->IncrementReferenceCount(); + + m_pMaterialSolidModelHulls = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugsolidmodelhulls", TEXTURE_GROUP_OTHER, true ); + m_pMaterialSolidModelHulls->IncrementReferenceCount(); + + m_pMaterialAdditiveVertexColorVertexAlpha = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/additivevertexcolorvertexalpha", TEXTURE_GROUP_OTHER, true ); + m_pMaterialAdditiveVertexColorVertexAlpha->IncrementReferenceCount(); + + m_pMaterialModelBones = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugmodelbones", TEXTURE_GROUP_OTHER, true ); + m_pMaterialModelBones->IncrementReferenceCount(); + + m_pMaterialModelEnvCubemap = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/env_cubemap_model", TEXTURE_GROUP_OTHER, true ); + m_pMaterialModelEnvCubemap->IncrementReferenceCount(); + + m_pMaterialWorldWireframe = + g_pMaterialSystem->FindMaterial( "//platform/materials/debug/debugworldwireframe", TEXTURE_GROUP_OTHER, true ); + m_pMaterialWorldWireframe->IncrementReferenceCount(); + + if( g_pMaterialSystemHardwareConfig->GetDXSupportLevel() >= 90 ) + { + KeyValues *pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 0 ); + pVMTKeyValues->SetInt( "$nocull", 0 ); + m_pDepthWrite[0][0] = g_pMaterialSystem->FindProceduralMaterial( "__DepthWrite00", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pDepthWrite[0][0]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 0 ); + pVMTKeyValues->SetInt( "$nocull", 1 ); + m_pDepthWrite[0][1] = g_pMaterialSystem->FindProceduralMaterial( "__DepthWrite01", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pDepthWrite[0][1]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 1 ); + pVMTKeyValues->SetInt( "$nocull", 0 ); + m_pDepthWrite[1][0] = g_pMaterialSystem->FindProceduralMaterial( "__DepthWrite10", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pDepthWrite[1][0]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 1 ); + pVMTKeyValues->SetInt( "$nocull", 1 ); + m_pDepthWrite[1][1] = g_pMaterialSystem->FindProceduralMaterial( "__DepthWrite11", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pDepthWrite[1][1]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 0 ); + pVMTKeyValues->SetInt( "$nocull", 0 ); + pVMTKeyValues->SetInt( "$color_depth", 1 ); + m_pSSAODepthWrite[0][0] = g_pMaterialSystem->FindProceduralMaterial( "__ColorDepthWrite00", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pSSAODepthWrite[0][0]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 0 ); + pVMTKeyValues->SetInt( "$nocull", 1 ); + pVMTKeyValues->SetInt( "$color_depth", 1 ); + m_pSSAODepthWrite[0][1] = g_pMaterialSystem->FindProceduralMaterial( "__ColorDepthWrite01", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pSSAODepthWrite[0][1]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 1 ); + pVMTKeyValues->SetInt( "$nocull", 0 ); + pVMTKeyValues->SetInt( "$color_depth", 1 ); + m_pSSAODepthWrite[1][0] = g_pMaterialSystem->FindProceduralMaterial( "__ColorDepthWrite10", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pSSAODepthWrite[1][0]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "DepthWrite" ); + pVMTKeyValues->SetInt( "$no_fullbright", 1 ); + pVMTKeyValues->SetInt( "$alphatest", 1 ); + pVMTKeyValues->SetInt( "$nocull", 1 ); + pVMTKeyValues->SetInt( "$color_depth", 1 ); + m_pSSAODepthWrite[1][1] = g_pMaterialSystem->FindProceduralMaterial( "__ColorDepthWrite11", TEXTURE_GROUP_OTHER, pVMTKeyValues ); + m_pSSAODepthWrite[1][1]->IncrementReferenceCount(); + + pVMTKeyValues = new KeyValues( "EyeGlint" ); + m_pGlintBuildMaterial = g_pMaterialSystem->CreateMaterial( "___glintbuildmaterial", pVMTKeyValues ); + } +} + +void CStudioRender::ShutdownDebugMaterials( void ) +{ +#ifdef _WIN32 + if ( m_pMaterialMRMWireframe ) + { + m_pMaterialMRMWireframe->DecrementReferenceCount(); + m_pMaterialMRMWireframe = NULL; + } + + if ( m_pMaterialMRMWireframeZBuffer ) + { + m_pMaterialMRMWireframeZBuffer->DecrementReferenceCount(); + m_pMaterialMRMWireframeZBuffer = NULL; + } + + if ( m_pMaterialMRMNormals ) + { + m_pMaterialMRMNormals->DecrementReferenceCount(); + m_pMaterialMRMNormals = NULL; + } + + if ( m_pMaterialTangentFrame ) + { + m_pMaterialTangentFrame->DecrementReferenceCount(); + m_pMaterialTangentFrame = NULL; + } + + if ( m_pMaterialTranslucentModelHulls ) + { + m_pMaterialTranslucentModelHulls->DecrementReferenceCount(); + m_pMaterialTranslucentModelHulls = NULL; + } + + if ( m_pMaterialSolidModelHulls ) + { + m_pMaterialSolidModelHulls->DecrementReferenceCount(); + m_pMaterialSolidModelHulls = NULL; + } + + if ( m_pMaterialAdditiveVertexColorVertexAlpha ) + { + m_pMaterialAdditiveVertexColorVertexAlpha->DecrementReferenceCount(); + m_pMaterialAdditiveVertexColorVertexAlpha = NULL; + } + + if ( m_pMaterialModelBones ) + { + m_pMaterialModelBones->DecrementReferenceCount(); + m_pMaterialModelBones = NULL; + } + + if ( m_pMaterialModelEnvCubemap ) + { + m_pMaterialModelEnvCubemap->DecrementReferenceCount(); + m_pMaterialModelEnvCubemap = NULL; + } + + if ( m_pMaterialWorldWireframe ) + { + m_pMaterialWorldWireframe->DecrementReferenceCount(); + m_pMaterialWorldWireframe = NULL; + } + + // DepthWrite materials + for ( int32 i = 0; i < 4; i++ ) + { + if ( m_pDepthWrite[ ( i & 0x2 ) >> 1 ][ i & 0x1 ] ) + { + m_pDepthWrite[ ( i & 0x2 ) >> 1 ][ i & 0x1 ]->DecrementReferenceCount(); + m_pDepthWrite[ ( i & 0x2 ) >> 1 ][ i & 0x1 ] = NULL; + } + + if ( m_pSSAODepthWrite[ ( i & 0x2 ) >> 1 ][ i & 0x1 ] ) + { + m_pSSAODepthWrite[ ( i & 0x2 ) >> 1 ][ i & 0x1 ]->DecrementReferenceCount(); + m_pSSAODepthWrite[ ( i & 0x2 ) >> 1 ][ i & 0x1 ] = NULL; + } + } + + if ( m_pGlintBuildMaterial ) + { + m_pGlintBuildMaterial->DecrementReferenceCount(); + m_pGlintBuildMaterial = NULL; + } +#endif +} + +static void ReleaseMaterialSystemObjects() +{ +// g_StudioRender.UncacheGlint(); +} + +static void RestoreMaterialSystemObjects( int nChangeFlags ) +{ +// g_StudioRender.PrecacheGlint(); +} + + + +//----------------------------------------------------------------------------- +// Init, shutdown +//----------------------------------------------------------------------------- +InitReturnVal_t CStudioRender::Init() +{ + if ( g_pMaterialSystem && g_pMaterialSystemHardwareConfig ) + { + g_pMaterialSystem->AddReleaseFunc( ReleaseMaterialSystemObjects ); + g_pMaterialSystem->AddRestoreFunc( RestoreMaterialSystemObjects ); + + InitDebugMaterials(); + + return INIT_OK; + } + + return INIT_FAILED; +} + +void CStudioRender::Shutdown( void ) +{ + UncacheGlint(); + ShutdownDebugMaterials(); + + if ( g_pMaterialSystem ) + { + g_pMaterialSystem->RemoveReleaseFunc( ReleaseMaterialSystemObjects ); + g_pMaterialSystem->RemoveRestoreFunc( RestoreMaterialSystemObjects ); + } +} + + +//----------------------------------------------------------------------------- +// Sets the lighting render state +//----------------------------------------------------------------------------- +void CStudioRender::SetLightingRenderState() +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // FIXME: What happens when we use the fixed function pipeline but vertex shaders + // are active? For the time being this only works because everything that does + // vertex lighting does, in fact, have a vertex shader which is used to render it. + pRenderContext->SetAmbientLightCube( m_pRC->m_LightBoxColors ); + + if ( m_pRC->m_Config.bSoftwareLighting || m_pRC->m_NumLocalLights == 0 ) + { + pRenderContext->DisableAllLocalLights(); + } + else + { + int nMaxLightCount = g_pMaterialSystemHardwareConfig->MaxNumLights(); + LightDesc_t desc; + desc.m_Type = MATERIAL_LIGHT_DISABLE; + + int i; + int nLightCount = min( m_pRC->m_NumLocalLights, nMaxLightCount ); + for( i = 0; i < nLightCount; ++i ) + { + pRenderContext->SetLight( i, m_pRC->m_LocalLights[i] ); + } + for( ; i < nMaxLightCount; ++i ) + { + pRenderContext->SetLight( i, desc ); + } + } +} + + +//----------------------------------------------------------------------------- +// Shadow state (affects the models as they are rendered) +//----------------------------------------------------------------------------- +void CStudioRender::AddShadow( IMaterial* pMaterial, void* pProxyData, FlashlightState_t *pFlashlightState, VMatrix *pWorldToTexture, ITexture *pFlashlightDepthTexture ) +{ + int i = m_ShadowState.AddToTail(); + ShadowState_t& state = m_ShadowState[i]; + state.m_pMaterial = pMaterial; + state.m_pProxyData = pProxyData; + state.m_pFlashlightState = pFlashlightState; + state.m_pWorldToTexture = pWorldToTexture; + state.m_pFlashlightDepthTexture = pFlashlightDepthTexture; +} + +void CStudioRender::ClearAllShadows() +{ + m_ShadowState.RemoveAll(); +} + +void CStudioRender::GetFlexStats( ) +{ +#ifdef REPORT_FLEX_STATS + static bool s_bLastFlexStats = false; + bool bDoStats = r_flexstats.GetInt() != 0; + if ( bDoStats ) + { + if ( !s_bLastFlexStats ) + { + s_nModelsDrawn = 0; + s_nActiveFlexCount = 0; + } + + // Count number of active weights + int nActiveFlexCount = 0; + for ( int i = 0; i < MAXSTUDIOFLEXDESC; ++i ) + { + if ( fabs( m_FlexWeights[i] ) >= 0.001f || fabs( m_FlexDelayedWeights[i] ) >= 0.001f ) + { + ++nActiveFlexCount; + } + } + + ++s_nModelsDrawn; + s_nActiveFlexCount += nActiveFlexCount; + } + else + { + if ( s_bLastFlexStats ) + { + if ( s_nModelsDrawn ) + { + Msg( "Average number of flexes/model: %d\n", s_nActiveFlexCount / s_nModelsDrawn ); + } + else + { + Msg( "No models rendered to take stats of\n" ); + } + + s_nModelsDrawn = 0; + s_nActiveFlexCount = 0; + } + } + + s_bLastFlexStats = bDoStats; +#endif +} + + +//----------------------------------------------------------------------------- +// Main model rendering entry point +//----------------------------------------------------------------------------- +void CStudioRender::DrawModel( const DrawModelInfo_t& info, const StudioRenderContext_t &rc, + matrix3x4_t *pBoneToWorld, const FlexWeights_t &flex, int flags ) +{ + if ( ( flags & STUDIORENDER_GENERATE_STATS ) != 0 ) + { + ModelStats( info, rc, pBoneToWorld, flex, flags ); + + return; + } + + VPROF( "CStudioRender::DrawModel"); + + m_pRC = const_cast< StudioRenderContext_t* >( &rc ); + m_pFlexWeights = flex.m_pFlexWeights; + m_pFlexDelayedWeights = flex.m_pFlexDelayedWeights; + m_pBoneToWorld = pBoneToWorld; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // Disable flex if we're told to... + bool flexConfig = m_pRC->m_Config.bFlex; + if (flags & STUDIORENDER_DRAW_NO_FLEXES) + { + m_pRC->m_Config.bFlex = false; + } + + // Enable wireframe if we're told to... + bool bWireframe = m_pRC->m_Config.bWireframe; + if ( flags & STUDIORENDER_DRAW_WIREFRAME ) + { + m_pRC->m_Config.bWireframe = true; + } + + int boneMask = BONE_USED_BY_VERTEX_AT_LOD( info.m_Lod ); + + // Preserve the matrices if we're skinning + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PushMatrix(); + pRenderContext->LoadIdentity(); + + m_VertexCache.StartModel(); + + m_pStudioHdr = info.m_pStudioHdr; + if ( !info.m_pHardwareData->m_pLODs ) + { + // If we are missing LODs then print the model name before returning + // so we can perhaps correct the underlying problem. + Msg( "Missing LODs for %s, lod index is %d.\n", m_pStudioHdr->pszName(), info.m_Lod ); + return; + } + m_pStudioMeshes = info.m_pHardwareData->m_pLODs[info.m_Lod].m_pMeshData; + + // Bone to world must be set before calling drawmodel; it uses that here + ComputePoseToWorld( m_PoseToWorld, m_pStudioHdr, boneMask, m_pRC->m_ViewOrigin, pBoneToWorld ); + + R_StudioRenderModel( pRenderContext, info.m_Skin, info.m_Body, info.m_HitboxSet, info.m_pClientEntity, + info.m_pHardwareData->m_pLODs[info.m_Lod].ppMaterials, + info.m_pHardwareData->m_pLODs[info.m_Lod].pMaterialFlags, flags, boneMask, info.m_Lod, info.m_pColorMeshes); + + // Draw all the decals on this model + // If the model is not in memory, this code may not function correctly + // This code assumes the model has been rendered! + // So skip if the model hasn't been rendered + // Also, skip if we're rendering to the shadow depth map + if ( ( m_pStudioMeshes != 0 ) && !( flags & ( STUDIORENDER_SHADOWDEPTHTEXTURE | STUDIORENDER_SSAODEPTHTEXTURE )) ) + { + if ((flags & STUDIORENDER_DRAW_GROUP_MASK) != STUDIORENDER_DRAW_TRANSLUCENT_ONLY) + { + DrawDecal( info, info.m_Lod, info.m_Body ); + } + + // Draw shadows + if ( !( flags & STUDIORENDER_DRAW_NO_SHADOWS ) ) + { + DrawShadows( info, flags, boneMask ); + } + + if( (flags & STUDIORENDER_DRAW_GROUP_MASK) != STUDIORENDER_DRAW_TRANSLUCENT_ONLY && + !( flags & STUDIORENDER_DRAW_NO_SHADOWS ) ) + { + DrawFlashlightDecals( info, info.m_Lod ); + } + } + + // Restore the matrices if we're skinning + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PopMatrix(); + + // Restore the configs + m_pRC->m_Config.bFlex = flexConfig; + m_pRC->m_Config.bWireframe = bWireframe; + +#ifdef REPORT_FLEX_STATS + GetFlexStats(); +#endif + + pRenderContext->SetNumBoneWeights( 0 ); + m_pRC = NULL; + m_pBoneToWorld = NULL; + m_pFlexWeights = NULL; + m_pFlexDelayedWeights = NULL; +} + +void CStudioRender::DrawModelStaticProp( const DrawModelInfo_t& info, + const StudioRenderContext_t &rc, const matrix3x4_t& rootToWorld, int flags ) +{ + VPROF( "CStudioRender::DrawModelStaticProp"); + + m_pRC = const_cast<StudioRenderContext_t*>( &rc ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + memcpy( &m_StaticPropRootToWorld, &rootToWorld, sizeof(matrix3x4_t) ); + memcpy( &m_PoseToWorld[0], &rootToWorld, sizeof(matrix3x4_t) ); + m_pBoneToWorld = &m_StaticPropRootToWorld; + + bool flexConfig = m_pRC->m_Config.bFlex; + m_pRC->m_Config.bFlex = false; + bool bWireframe = m_pRC->m_Config.bWireframe; + if ( flags & STUDIORENDER_DRAW_WIREFRAME ) + { + m_pRC->m_Config.bWireframe = true; + } + + int lod = info.m_Lod; + m_pStudioHdr = info.m_pStudioHdr; + m_pStudioMeshes = info.m_pHardwareData->m_pLODs[lod].m_pMeshData; + + if ( ( flags & STUDIORENDER_GENERATE_STATS ) != 0 ) + { + FlexWeights_t flex; + + ModelStats( info, rc, m_pBoneToWorld, flex, flags | STUDIORENDER_DRAW_NO_FLEXES ); + + return; + } + + R_StudioRenderModel( pRenderContext, info.m_Skin, info.m_Body, info.m_HitboxSet, info.m_pClientEntity, + info.m_pHardwareData->m_pLODs[lod].ppMaterials, + info.m_pHardwareData->m_pLODs[lod].pMaterialFlags, flags, BONE_USED_BY_ANYTHING, lod, info.m_pColorMeshes); + + // If we're not shadow depth mapping + if ( ( flags & ( STUDIORENDER_SHADOWDEPTHTEXTURE | STUDIORENDER_SSAODEPTHTEXTURE ) ) == 0 ) + { + // FIXME: Should this occur in a separate call? + // Draw all the decals on this model + if ((flags & STUDIORENDER_DRAW_GROUP_MASK) != STUDIORENDER_DRAW_TRANSLUCENT_ONLY) + { + DrawDecal( info, lod, info.m_Body ); + } + + // Draw shadows + if ( !( flags & STUDIORENDER_DRAW_NO_SHADOWS ) ) + { + DrawShadows( info, flags, BONE_USED_BY_ANYTHING ); + } + + if( (flags & STUDIORENDER_DRAW_GROUP_MASK) != STUDIORENDER_DRAW_TRANSLUCENT_ONLY && + !( flags & STUDIORENDER_DRAW_NO_SHADOWS ) ) + { + DrawFlashlightDecals( info, lod ); + } + } + + // Restore the configs + m_pRC->m_Config.bFlex = flexConfig; + m_pRC->m_Config.bWireframe = bWireframe; + + pRenderContext->SetNumBoneWeights( 0 ); + m_pBoneToWorld = NULL; + m_pRC = NULL; +} + + + + +// UNDONE: Currently no flex supported, no per instance cubemap or other lighting state supported, no eyeballs supported +// NOTE: This is a fast path for simple models with skeletons but not many other features +void CStudioRender::DrawModelArray( const DrawModelInfo_t &drawInfo, const StudioRenderContext_t &rc, int arrayCount, model_array_instance_t *pInstanceData, int instanceStride, int flags ) +{ + tmZone( TELEMETRY_LEVEL0, TMZF_NONE, "%s %d", __FUNCTION__, arrayCount ); + +#ifndef SWDS // no drawing on dedicated server +#if 0 + FlexWeights_t flex; + memset(&flex, 0, sizeof(flex)); + for ( int i = 0; i < arrayCount; i++ ) + { + DrawModel( drawInfo, rc, &pInstanceData[i].modelToWorld, flex, flags ); + } + return; +#endif + + m_pRC = const_cast< StudioRenderContext_t* >( &rc ); + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // Preserve the matrices if we're skinning + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PushMatrix(); + pRenderContext->LoadIdentity(); + pRenderContext->SetNumBoneWeights( 0 ); + + // get the studio mesh data for this lod + studiomeshdata_t *pMeshDataBase = drawInfo.m_pHardwareData->m_pLODs[drawInfo.m_Lod].m_pMeshData; + IMaterial **ppMaterials = drawInfo.m_pHardwareData->m_pLODs[drawInfo.m_Lod].ppMaterials; + int *pMaterialFlags = drawInfo.m_pHardwareData->m_pLODs[drawInfo.m_Lod].pMaterialFlags; + studiohdr_t *pStudioHdr = drawInfo.m_pStudioHdr; + m_bDrawTranslucentSubModels = false; + + int skin = drawInfo.m_Skin; + short *pskinref = pStudioHdr->pSkinref( 0 ); + if ( skin > 0 && skin < pStudioHdr->numskinfamilies ) + { + pskinref += ( skin * pStudioHdr->numskinref ); + } + + for ( int body = 0; body < pStudioHdr->numbodyparts; ++body ) + { + mstudiobodyparts_t *pbodypart = pStudioHdr->pBodypart( body ); + + int index = drawInfo.m_Body / pbodypart->base; + index = index % pbodypart->nummodels; + mstudiomodel_t *pSubmodel = pbodypart->pModel( index ); + + + for ( int meshIndex = 0; meshIndex < pSubmodel->nummeshes; ++meshIndex ) + { + mstudiomesh_t *pmesh = pSubmodel->pMesh(meshIndex); + studiomeshdata_t *pMeshData = &pMeshDataBase[pmesh->meshid]; + Assert( pMeshData ); + + if ( !pMeshData->m_NumGroup ) + continue; + + if ( !pMaterialFlags ) + continue; + + StudioModelLighting_t lighting = LIGHTING_HARDWARE; + int materialFlags = pMaterialFlags[pskinref[pmesh->material]]; + + IMaterial* pMaterial = R_StudioSetupSkinAndLighting( pRenderContext, pskinref[ pmesh->material ], ppMaterials, materialFlags, drawInfo.m_pClientEntity, NULL, lighting ); + if ( !pMaterial ) + continue; + + // eyeball! can't do those in array mode yet + Assert( pmesh->materialtype != 1 ); + //R_StudioDrawMesh( pRenderContext, pmesh, pMeshData, lighting, pMaterial, NULL, drawInfo.m_Lod ); + // Draw all the various mesh groups... + for ( int meshGroupIndex = 0; meshGroupIndex < pMeshData->m_NumGroup; ++meshGroupIndex ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[meshGroupIndex]; + + // Older models are merely flexed while new ones are also delta flexed + Assert(!(pGroup->m_Flags & MESHGROUP_IS_FLEXED)); + Assert(!(pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED)); + IMesh *pMesh = pGroup->m_pMesh; + + // Needed when we switch back and forth between hardware + software lighting + if ( IsPC() && pGroup->m_MeshNeedsRestore ) + { + VertexCompressionType_t compressionType = CompressionType( pMesh->GetVertexFormat() ); + switch ( compressionType ) + { + case VERTEX_COMPRESSION_ON: + R_StudioRestoreMesh<VERTEX_COMPRESSION_ON>( pmesh, pGroup ); + case VERTEX_COMPRESSION_NONE: + default: + R_StudioRestoreMesh<VERTEX_COMPRESSION_NONE>( pmesh, pGroup ); + break; + } + pGroup->m_MeshNeedsRestore = false; + } + pMesh->SetColorMesh( NULL, 0 ); + + MaterialPrimitiveType_t stripType = MATERIAL_TRIANGLES; + pMesh->SetPrimitiveType(stripType); + if ( pStudioHdr->numbones > 1 ) + { + byte *pData = (byte *)pInstanceData; + for ( int i = 0;i < arrayCount; i++, pData += instanceStride ) + { + matrix3x4_t *pBones = &( ((model_array_instance_t *)pData)->modelToWorld ); + pRenderContext->LoadMatrix( pBones[0] ); + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j]; + // Reset bone state if we're hardware skinning + pRenderContext->SetNumBoneWeights( pStrip->numBones ); + for (int k = 0; k < pStrip->numBoneStateChanges; ++k) + { + OptimizedModel::BoneStateChangeHeader_t* pStateChange = pStrip->pBoneStateChange(k); + if ( pStateChange->newBoneID < 0 ) + break; + + pRenderContext->LoadBoneMatrix( pStateChange->hardwareID, pBones[pStateChange->newBoneID] ); + } + MaterialPrimitiveType_t localStripType = pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP ? MATERIAL_TRIANGLE_STRIP : MATERIAL_TRIANGLES; + + if ( localStripType != stripType ) + { + pMesh->SetPrimitiveType( localStripType ); + stripType = localStripType; + } + pMesh->Draw( pStrip->indexOffset, pStrip->numIndices ); + } + } + pRenderContext->SetNumBoneWeights( 0 ); + } + else + { + byte *pData = (byte *)pInstanceData; + for ( int i = 0;i < arrayCount; i++, pData += instanceStride ) + { + matrix3x4_t *pBones = &( ((model_array_instance_t *)pData)->modelToWorld ); + pRenderContext->LoadMatrix( pBones[0] ); + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j]; + MaterialPrimitiveType_t localStripType = pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP ? MATERIAL_TRIANGLE_STRIP : MATERIAL_TRIANGLES; + + if ( localStripType != stripType ) + { + pMesh->SetPrimitiveType( localStripType ); + stripType = localStripType; + } + pMesh->Draw( pStrip->indexOffset, pStrip->numIndices ); + } + } + } + } + } + } + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->PopMatrix(); +#endif +} + diff --git a/studiorender/studiorender.h b/studiorender/studiorender.h new file mode 100644 index 0000000..50a875a --- /dev/null +++ b/studiorender/studiorender.h @@ -0,0 +1,931 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#ifndef CSTUDIORENDER_H +#define CSTUDIORENDER_H +#ifdef _WIN32 +#pragma once +#endif + +#include "istudiorender.h" +#include "studio.h" +#include "materialsystem/imaterialsystem.h" // for LightDesc_t +// wouldn't have to include these if it weren't for inlines. +#include "materialsystem/imaterial.h" +#include "mathlib/mathlib.h" +#include "utllinkedlist.h" +#include "utlvector.h" +#include "tier1/utllinkedlist.h" +#include "flexrenderdata.h" +#include "mathlib/compressed_vector.h" +#include "r_studiolight.h" +#if defined( _WIN32 ) && !defined( _X360 ) +#include <xmmintrin.h> +#endif +#include "tier0/dbg.h" + + +//----------------------------------------------------------------------------- +// Forward declarations +//----------------------------------------------------------------------------- +class ITexture; +class CPixelWriter; +class CMeshBuilder; +class IMaterialVar; +struct mstudioeyeball_t; +struct eyeballstate_t; +struct lightpos_t; +struct dworldlight_t; +struct DecalClipState_t; +class CStudioRender; +struct StudioRenderContext_t; +struct FlexWeights_t; + +namespace OptimizedModel +{ + struct FileHeader_t; + struct MeshHeader_t; + struct StripGroupHeader_t; + struct Vertex_t; + struct ModelLODHeader_t; +} + + +//----------------------------------------------------------------------------- +// FIXME: Remove +//----------------------------------------------------------------------------- +class IStudioDataCache; +extern IStudioDataCache *g_pStudioDataCache; + + +//----------------------------------------------------------------------------- +// Singleton +//----------------------------------------------------------------------------- +extern CStudioRender g_StudioRender; + + +//----------------------------------------------------------------------------- +// Defines + structs +//----------------------------------------------------------------------------- +#define MAXLOCALLIGHTS 4 +#define MAXLIGHTCOMPUTE 16 + +enum StudioModelLighting_t +{ + LIGHTING_HARDWARE = 0, + LIGHTING_SOFTWARE, + LIGHTING_MOUTH +}; + +struct lightpos_t +{ + Vector delta; // unit vector from vertex to light + float falloff; // light distance falloff + float dot; // light direction * delta; + + lightpos_t() {} + +private: + // Copy constructors are not allowed + lightpos_t( const lightpos_t& src ); +}; + +struct eyeballstate_t +{ + const mstudioeyeball_t *peyeball; + + matrix3x4_t mat; + + Vector org; // world center of eyeball + Vector forward; + Vector right; + Vector up; + + Vector cornea; // world center of cornea + + eyeballstate_t() {} + +private: + // Copy constructors are not allowed + eyeballstate_t( const eyeballstate_t& src ); +}; + + +//----------------------------------------------------------------------------- +// Store decal vertex data here +//----------------------------------------------------------------------------- +#pragma pack(1) +struct DecalVertex_t +{ + mstudiomesh_t *GetMesh( studiohdr_t *pHdr ) + { + if ((m_Body == 0xFFFF) || (m_Model == 0xFFFF) || (m_Mesh == 0xFFFF)) + return NULL; + + mstudiobodyparts_t *pBody = pHdr->pBodypart( m_Body ); + mstudiomodel_t *pModel = pBody->pModel( m_Model ); + return pModel->pMesh( m_Mesh ); + } + + IMorph *GetMorph( studiohdr_t *pHdr, studiomeshdata_t *pStudioMeshes ) + { + if ( (m_Body == 0xFFFF) || (m_Model == 0xFFFF) || (m_Mesh == 0xFFFF) || (m_Group == 0xFFFF) ) + return NULL; + + mstudiobodyparts_t *pBody = pHdr->pBodypart( m_Body ); + mstudiomodel_t *pModel = pBody->pModel( m_Model ); + mstudiomesh_t *pMesh = pModel->pMesh( m_Mesh ); + studiomeshdata_t* pMeshData = &pStudioMeshes[pMesh->meshid]; + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[m_Group]; + return pGroup->m_pMorph; + } + + // NOTE: m_Group + m_GroupIndex is necessary only for decals on + // hardware morphs. If COMPACT_DECAL_VERT is for console, we + // could remove group index + group +#ifdef COMPACT_DECAL_VERT + Vector m_Position; // 12 + Vector2d32 m_TexCoord; // 16 + Vector48 m_Normal; // 22 (packed to m_Body) + + byte m_Body; // 24 + byte m_Model; + unsigned short m_MeshVertexIndex; // index into the mesh's vertex list + unsigned short m_Mesh; + unsigned short m_GroupIndex; // index into the mesh's vertex list + unsigned short m_Group; +#else + Vector m_Position; + Vector m_Normal; + Vector2D m_TexCoord; + + unsigned short m_MeshVertexIndex; // index into the mesh's vertex list + unsigned short m_Body; + unsigned short m_Model; + unsigned short m_Mesh; + unsigned short m_GroupIndex; // index into the group's index list + unsigned short m_Group; +#endif + + DecalVertex_t() {} + DecalVertex_t( const DecalVertex_t& src ) + { + m_Position = src.m_Position; + m_Normal = src.m_Normal; + m_TexCoord = src.m_TexCoord; + m_MeshVertexIndex = src.m_MeshVertexIndex; + m_Body = src.m_Body; + m_Model = src.m_Model; + m_Mesh = src.m_Mesh; + m_GroupIndex = src.m_GroupIndex; + m_Group = src.m_Group; + } +}; +#pragma pack() + + +//----------------------------------------------------------------------------- +// Temporary meshes +//----------------------------------------------------------------------------- +struct MeshVertexInfo_t +{ + mstudiomesh_t *m_pMesh; + int m_nIndex; +}; + + +//----------------------------------------------------------------------------- +// Vertex prefetch count for software skinning +//----------------------------------------------------------------------------- +enum +{ + PREFETCH_VERT_COUNT = 4 +}; + + +//----------------------------------------------------------------------------- +// Class that actually renders stuff +//----------------------------------------------------------------------------- +class CStudioRender +{ +public: + CStudioRender(); + ~CStudioRender(); + + // Init, shutdown + InitReturnVal_t Init(); + void Shutdown( void ); + + void EnableScissor( FlashlightState_t *state ); + void DisableScissor(); + + void DrawModel( const DrawModelInfo_t& info, const StudioRenderContext_t& rc, matrix3x4_t *pBoneToWorld, const FlexWeights_t& flex, int flags = STUDIORENDER_DRAW_ENTIRE_MODEL ); + void DrawModelArray( const DrawModelInfo_t &drawInfo, const StudioRenderContext_t &rc, int arrayCount, model_array_instance_t *pInstanceData, int instanceStride, int flags = STUDIORENDER_DRAW_ENTIRE_MODEL ); + + // Static-prop related draw methods + void DrawModelStaticProp( const DrawModelInfo_t& info, const StudioRenderContext_t &rc, const matrix3x4_t &modelToWorld, int flags = STUDIORENDER_DRAW_ENTIRE_MODEL ); + void DrawStaticPropShadows( const DrawModelInfo_t &drawInfo, const StudioRenderContext_t &rc, const matrix3x4_t &modelToWorld, int flags ); + void DrawStaticPropDecals( const DrawModelInfo_t &drawInfo, const StudioRenderContext_t &rc, const matrix3x4_t &modelToWorld ); + + void ModelStats( const DrawModelInfo_t& info, const StudioRenderContext_t &rc, matrix3x4_t *pBoneToWorld, const FlexWeights_t &flex, int flags ); + + // Create, destroy list of decals for a particular model + StudioDecalHandle_t CreateDecalList( studiohwdata_t *pHardwareData ); + void DestroyDecalList( StudioDecalHandle_t handle ); + + // Add decals to a decal list by doing a planar projection along the ray + void AddDecal( StudioDecalHandle_t handle, const StudioRenderContext_t& rc, matrix3x4_t *pBoneToWorld, studiohdr_t *pStudioHdr, + const Ray_t & ray, const Vector& decalUp, IMaterial* pDecalMaterial, + float radius, int body, bool noPokethru, int maxLODToDecal = ADDDECAL_TO_ALL_LODS ); + + // Shadow state (affects the models as they are rendered) + void AddShadow( IMaterial* pMaterial, void* pProxyData, FlashlightState_t *pFlashlightState, VMatrix *pWorldToTexture, ITexture *pFlashlightDepthTexture ); + void ClearAllShadows(); + + // Release/restore material system objects + void PrecacheGlint(); + void UncacheGlint(); + + // Get the config + void R_MouthComputeLightingValues( float& fIllum, Vector& forward ); + void R_MouthLighting( float fIllum, const Vector& normal, const Vector& forward, Vector& light ); + + // Performs the lighting computation + inline void R_ComputeLightAtPoint3( const Vector &pos, const Vector &norm, Vector &color ); + +#if defined( _WIN32 ) && !defined( _X360 ) + // sse-ized lighting pipeline. lights 4 vertices at once + inline void R_ComputeLightAtPoints3( const FourVectors &pos, const FourVectors &norm, FourVectors &color ); + void R_MouthLighting( __m128 fIllum, const FourVectors& normal, const FourVectors& forward, FourVectors& light ); +#endif + +private: + enum + { + DECAL_DYNAMIC = 0x1, + DECAL_SECONDPASS = 0x2, + }; + + typedef unsigned short DecalId_t; + + struct Decal_t + { + int m_IndexCount; + int m_VertexCount; + float m_FadeStartTime; + float m_FadeDuration; + int m_Flags; + }; + + struct DecalHistory_t + { + unsigned short m_Material; + unsigned short m_Decal; + DecalId_t m_nId; + unsigned short m_nPad; + }; + + typedef CUtlLinkedList<DecalVertex_t, unsigned short> DecalVertexList_t; + + typedef CUtlVector<unsigned short> DecalIndexList_t; + typedef CUtlLinkedList<Decal_t, unsigned short> DecalList_t; + typedef CUtlLinkedList<DecalHistory_t, unsigned short> DecalHistoryList_t; + + struct DecalMaterial_t + { + IMaterial* m_pMaterial; + DecalIndexList_t m_Indices; + DecalVertexList_t m_Vertices; + DecalList_t m_Decals; + }; + + struct DecalLod_t + { + unsigned short m_FirstMaterial; + DecalHistoryList_t m_DecalHistory; + }; + + struct DecalModelList_t + { + studiohwdata_t* m_pHardwareData; + DecalLod_t* m_pLod; + int m_nLods; // need to retain because hardware data could be flushed + }; + + // A temporary structure used to figure out new decal verts + struct DecalBuildVertexInfo_t + { + enum + { + FRONT_FACING = 0x1, + VALID_AREA = 0x2, // If you change this, change ProjectDecalOntoMesh + }; + + Vector2D m_UV; + unsigned short m_VertexIndex; // index into the DecalVertex_t list + unsigned char m_UniqueID; + unsigned char m_Flags; + + private: + // No copy constructors + DecalBuildVertexInfo_t( const DecalBuildVertexInfo_t &src ); + }; + + struct DecalBuildInfo_t + { + IMaterial **m_ppMaterials; + studiohdr_t *m_pStudioHdr; + mstudiomesh_t *m_pMesh; + studiomeshdata_t *m_pMeshData; + DecalMaterial_t *m_pDecalMaterial; + MeshVertexInfo_t *m_pMeshVertices; + const mstudio_meshvertexdata_t *m_pMeshVertexData; + const thinModelVertices_t *m_pMeshThinVertexData; + int m_nGlobalMeshIndex; + DecalBuildVertexInfo_t *m_pVertexBuffer; + float m_Radius; + DecalBuildVertexInfo_t *m_pVertexInfo; + int m_Body; + int m_Model; + int m_Mesh; + int m_Group; + DecalVertexList_t::IndexType_t m_FirstVertex; + unsigned short m_VertexCount; + bool m_UseClipVert; + bool m_NoPokeThru; + }; + + struct ShadowState_t + { + IMaterial* m_pMaterial; + void* m_pProxyData; + FlashlightState_t * m_pFlashlightState; + VMatrix * m_pWorldToTexture; + ITexture * m_pFlashlightDepthTexture; + }; + + struct BodyPartInfo_t + { + int m_nSubModelIndex; + mstudiomodel_t *m_pSubModel; + }; + + struct GlintRenderData_t + { + Vector2D m_vecPosition; + Vector m_vecIntensity; + }; + + // Global LRU for model decals + struct DecalLRU_t + { + StudioDecalHandle_t m_hDecalHandle; + DecalId_t m_nDecalId; + }; + + typedef CUtlFixedLinkedList< DecalLRU_t >::IndexType_t DecalLRUListIndex_t; + +private: + void SetLightingRenderState(); + + int R_StudioRenderModel( IMatRenderContext *pRenderContext, int skin, int body, int hitboxset, void /*IClientEntity*/ *pEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int flags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes = NULL ); + IMaterial* R_StudioSetupSkinAndLighting( IMatRenderContext *pRenderContext, int index, IMaterial **ppMaterials, int materialFlags, + void /*IClientEntity*/ *pClientEntity, ColorMeshInfo_t *pColorMeshes, StudioModelLighting_t &lighting ); + int R_StudioDrawEyeball( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData, + StudioModelLighting_t lighting, IMaterial *pMaterial, int lod ); + int R_StudioDrawPoints( IMatRenderContext *pRenderContext, int skin, void /*IClientEntity*/ *pClientEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ); + int R_StudioDrawMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData, + StudioModelLighting_t lighting, IMaterial *pMaterial, ColorMeshInfo_t *pColorMeshes, int lod ); + int R_StudioRenderFinal( IMatRenderContext *pRenderContext, + int skin, int nBodyPartCount, BodyPartInfo_t *pBodyPartInfo, void /*IClientEntity*/ *pClientEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes = NULL ); + int R_StudioDrawStaticMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, + studiomeshgroup_t* pGroup, StudioModelLighting_t lighting, float r_blend, IMaterial* pMaterial, + int lod, ColorMeshInfo_t *pColorMeshes ); + int R_StudioDrawDynamicMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, + studiomeshgroup_t* pGroup, StudioModelLighting_t lighting, + float r_blend, IMaterial* pMaterial, int lod ); + int R_StudioDrawGroupHWSkin( IMatRenderContext *pRenderContext, studiomeshgroup_t* pGroup, IMesh* pMesh, ColorMeshInfo_t *pColorMeshInfo = NULL ); + int R_StudioDrawGroupSWSkin( studiomeshgroup_t* pGroup, IMesh* pMesh ); + void R_StudioDrawHulls( int hitboxset, bool translucent ); + void R_StudioDrawBones (void); + void R_StudioVertBuffer( void ); + void DrawNormal( const Vector& pos, float scale, const Vector& normal, const Vector& color ); + void BoneMatToMaterialMat( matrix3x4_t& boneMat, float materialMat[4][4] ); + + // Various inner-loop methods + void R_StudioSoftwareProcessMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend, + bool bNeedsTangentSpace, bool bDX8Vertex, IMaterial *pMaterial ); + + void R_StudioSoftwareProcessMesh_Normals( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend, + bool bShowNormals, bool bShowTangentFrame ); + + template< class T > + void ComputeFlexedVertex_StreamOffset( mstudioflex_t *pflex, T *pvanim, int vertCount, float w1, float w2, float w3, float w4 ); + + void R_StudioProcessFlexedMesh_StreamOffset( mstudiomesh_t* pmesh, int lod ); + + template <VertexCompressionType_t T> void FillFlexMeshGroupVB( CMeshBuilder & meshBuilder, studiomeshgroup_t *pGroup ); + void R_StudioFlexMeshGroup( studiomeshgroup_t *pGroup ); + + template<VertexCompressionType_t T> void R_StudioRestoreMesh( mstudiomesh_t* pmesh, studiomeshgroup_t* pMeshData ); + void R_StudioProcessFlexedMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh ); + + // Eye rendering using vertex shaders + void SetEyeMaterialVars( IMaterial* pMaterial, mstudioeyeball_t* peyeball, + const Vector& eyeOrigin, const matrix3x4_t& irisTransform, const matrix3x4_t& glintTransform ); + + void ComputeEyelidStateFACS( mstudiomodel_t *pSubModel ); + + void R_StudioEyelidFACS( const mstudioeyeball_t *peyeball, const eyeballstate_t *pstate ); + + void R_StudioEyeballPosition( const mstudioeyeball_t *peyeball, eyeballstate_t *pstate ); + + // Computes the texture projection matrix for the glint texture + void ComputeGlintTextureProjection( eyeballstate_t const* pState, + const Vector& vright, const Vector& vup, matrix3x4_t& mat ); + + void R_StudioEyeballGlint( const eyeballstate_t *pstate, IMaterialVar *pGlintTextureVar, + const Vector& vright, const Vector& vup, const Vector& r_origin ); + ITexture* RenderGlintTexture( const eyeballstate_t *pstate, + const Vector& vright, const Vector& vup, const Vector& r_origin ); + + int BuildGlintRenderData( GlintRenderData_t *pData, int nMaxGlints, + const eyeballstate_t *pstate, const Vector& vright, const Vector& vup, const Vector& r_origin ); + void R_MouthSetupVertexShader( IMaterial* pMaterial ); + + // Computes a vertex format to use + VertexFormat_t ComputeSWSkinVertexFormat( IMaterial *pMaterial ) const; + + inline bool R_TeethAreVisible( void ) + { + return true; + /* + // FIXME: commented out until Gary can change them to just draw black + mstudiomouth_t *pMouth = m_pStudioHdr->pMouth( 0 ); + float fIllum = m_FlexWeights[pMouth->flexdesc]; + return fIllum > 0.0f; + */ + } + + inline StudioModelLighting_t R_StudioComputeLighting( IMaterial *pMaterial, int materialFlags, ColorMeshInfo_t *pColorMeshes ); + inline void R_StudioTransform( Vector& in1, mstudioboneweight_t *pboneweight, Vector& out1 ); + inline void R_StudioRotate( Vector& in1, mstudioboneweight_t *pboneweight, Vector& out1 ); + inline void R_StudioRotate( Vector4D& in1, mstudioboneweight_t *pboneweight, Vector4D& out1 ); + inline void R_StudioEyeballNormal( mstudioeyeball_t const* peyeball, Vector& org, + Vector& pos, Vector& normal ); + void MaterialPlanerProjection( const matrix3x4_t& mat, int count, const Vector *psrcverts, Vector2D *pdesttexcoords ); + void AddGlint( CPixelWriter &pixelWriter, float x, float y, const Vector& color ); + + // Methods associated with lighting + int R_LightGlintPosition( int index, const Vector& org, Vector& delta, Vector& intensity ); + void R_LightEffectsWorld( const lightpos_t *light, const Vector& normal, const Vector &src, Vector &dest ); + + void R_GatherStats( studiomeshgroup_t *pGroup, CMeshBuilder &MeshBuilder, IMesh *pMesh, IMaterial *pMaterial ); + +public: + // NJS: Messy, but needed for an externally optimized routine to set up the lighting. + void R_InitLightEffectsWorld3(); + void (FASTCALL *R_LightEffectsWorld3)( const LightDesc_t *pLightDesc, const lightpos_t *light, const Vector& normal, Vector &dest ); + +private: + inline float R_WorldLightAngle( const LightDesc_t *wl, const Vector& lnormal, const Vector& snormal, const Vector& delta ); + + void InitDebugMaterials( void ); + void ShutdownDebugMaterials( void ); + int SortMeshes( int* pIndices, IMaterial **ppMaterials, short* pskinref, const Vector& vforward, const Vector& r_origin ); + + // Computes pose to decal space transforms for decal creation + // returns false if it can't for some reason. + bool ComputePoseToDecal( Ray_t const& ray, const Vector& up ); + + bool AddDecalToModel( DecalBuildInfo_t& buildInfo ); + + // Helper methods for decal projection, projects pose space vertex data + bool TransformToDecalSpace( DecalBuildInfo_t& build, const Vector& pos, mstudioboneweight_t *pboneweight, Vector2D& uv ); + bool ProjectDecalOntoMesh( DecalBuildInfo_t& build, DecalBuildVertexInfo_t* pVertexInfo, mstudiomesh_t *pMesh ); + bool IsFrontFacing( const Vector * norm, const mstudioboneweight_t *pboneweight ); + int ComputeClipFlags( DecalBuildVertexInfo_t* pVertexInfo, int i ); + void ConvertMeshVertexToDecalVertex( DecalBuildInfo_t& build, int meshIndex, DecalVertex_t& decalVertex, int nGroupIndex = 0xFFFF ); + unsigned short AddVertexToDecal( DecalBuildInfo_t& build, int meshIndex, int nGroupIndex = 0xFFFF ); + unsigned short AddVertexToDecal( DecalBuildInfo_t& build, DecalVertex_t& vert ); + void AddClippedDecalToTriangle( DecalBuildInfo_t& build, DecalClipState_t& clipState ); + bool ClipDecal( DecalBuildInfo_t& build, int i1, int i2, int i3, int *pClipFlags ); + void AddTriangleToDecal( DecalBuildInfo_t& build, int i1, int i2, int i3, int gi1, int gi2, int gi3 ); + void AddDecalToMesh( DecalBuildInfo_t& build ); + int GetDecalMaterial( DecalLod_t& decalLod, IMaterial* pDecalMaterial ); + int AddDecalToMaterialList( DecalMaterial_t* pMaterial ); + + // Total number of meshes we have to deal with + int ComputeTotalMeshCount( int iRootLOD, int iMaxLOD, int body ) const; + + // Project decals onto all meshes + void ProjectDecalsOntoMeshes( DecalBuildInfo_t& build, int nMeshCount ); + + // Set up the locations for vertices to use + int ComputeVertexAllocation( int iMaxLOD, int body, studiohwdata_t *pHardwareData, MeshVertexInfo_t *pVertexInfo ); + + // Removes a decal and associated vertices + indices from the history list + void RetireDecal( DecalModelList_t &list, DecalId_t nDecalID, int iLOD, int iMaxLOD ); + + // Helper methods related to drawing decals + void DrawSingleBoneDecals( CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial ); + bool DrawMultiBoneDecals( CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial, studiohdr_t *pStudioHdr ); + void DrawSingleBoneFlexedDecals( IMatRenderContext *pRenderContext, CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial ); + bool DrawMultiBoneFlexedDecals( IMatRenderContext *pRenderContext, CMeshBuilder& meshBuilder, DecalMaterial_t& decalMaterial, studiohdr_t *pStudioHdr, studioloddata_t *pStudioLOD ); + void DrawDecalMaterial( IMatRenderContext *pRenderContext, DecalMaterial_t& decalMaterial, studiohdr_t *pStudioHdr, studioloddata_t *pStudioLOD ); + void DrawDecal( const DrawModelInfo_t &drawInfo, int lod, int body ); + bool PreDrawDecal( IMatRenderContext *pRenderContext, const DrawModelInfo_t &drawInfo ); + + // Draw shadows + void DrawShadows( const DrawModelInfo_t& info, int flags, int boneMask ); + + // Draw flashlight lighting on decals. + void DrawFlashlightDecals( const DrawModelInfo_t& info, int lod ); + + // Helper methods related to extracting and balancing + float RampFlexWeight( mstudioflex_t &flex, float w ); + + // Remove decal from LRU + void RemoveDecalListFromLRU( StudioDecalHandle_t h ); + + // Helper methods related to flexing vertices + void R_StudioFlexVerts( mstudiomesh_t *pmesh, int lod ); + + // Flex stats + void GetFlexStats( ); + + // Sets up the hw flex mesh + void ComputeFlexWeights( int nFlexCount, mstudioflex_t *pFlex, MorphWeight_t *pWeights ); + + // Generate morph accumulator + void GenerateMorphAccumulator( mstudiomodel_t *pSubModel ); + + // Computes eyeball state + void ComputeEyeballState( mstudiomodel_t *pSubModel ); + + // Avoid some warnings... + CStudioRender( CStudioRender const& ); + +public: + // Render context (comes from queue) + StudioRenderContext_t *m_pRC; + +private: + // Stores all decals for a particular material and lod + CUtlLinkedList< DecalMaterial_t, unsigned short, true > m_DecalMaterial; + + // Stores all decal lists that have been made + CUtlFixedLinkedList< DecalModelList_t > m_DecalList; + CThreadFastMutex m_DecalMutex; + + // Stores all shadows to be cast on the current object + CUtlVector<ShadowState_t> m_ShadowState; + + matrix3x4_t m_StaticPropRootToWorld; + matrix3x4_t *m_pBoneToWorld; // bone transformation matrix( comes from queue ) + + matrix3x4_t *m_PoseToWorld; // bone transformation matrix + matrix3x4_t *m_PoseToDecal; // bone transformation matrix + + // Flex state, comes from queue + float *m_pFlexWeights; + float *m_pFlexDelayedWeights; + + studiohdr_t *m_pStudioHdr; + mstudiomodel_t *m_pSubModel; + studiomeshdata_t *m_pStudioMeshes; + + eyeballstate_t m_pEyeballState[16]; // MAXSTUDIOEYEBALLS + + // debug materials + IMaterial *m_pMaterialMRMWireframe; + IMaterial *m_pMaterialMRMWireframeZBuffer; + IMaterial *m_pMaterialMRMNormals; + IMaterial *m_pMaterialTangentFrame; + IMaterial *m_pMaterialTranslucentModelHulls; + IMaterial *m_pMaterialSolidModelHulls; + IMaterial *m_pMaterialAdditiveVertexColorVertexAlpha; + IMaterial *m_pMaterialModelBones; + IMaterial *m_pMaterialWorldWireframe; + IMaterial *m_pMaterialModelEnvCubemap; + + // Depth override material + IMaterial *m_pDepthWrite[2][2]; + IMaterial *m_pSSAODepthWrite[2][2]; + + // GLINT data + ITexture* m_pGlintTexture; + ITexture* m_pGlintLODTexture; + IMaterial *m_pGlintBuildMaterial; + short m_GlintWidth; + short m_GlintHeight; + + // Flex data + CCachedRenderData m_VertexCache; + + // Cached variables: + bool m_bSkippedMeshes : 1; + bool m_bDrawTranslucentSubModels : 1; + + DecalId_t m_nDecalId; + CUtlFixedLinkedList< DecalLRU_t > m_DecalLRU; + + friend class CGlintTextureRegenerator; + friend struct mstudiomodel_t; + friend class CStudioRenderContext; +}; + + +//----------------------------------------------------------------------------- +// Converts matrices to a format material system wants +//----------------------------------------------------------------------------- + +/* +================ +R_StudioTransform +================ +*/ +inline void CStudioRender::R_StudioTransform( Vector& in1, mstudioboneweight_t *pboneweight, Vector& out1 ) +{ +// MEASURECODE( "R_StudioTransform" ); + + Vector out2; + switch( pboneweight->numbones ) + { + case 1: + VectorTransform( in1, m_PoseToWorld[(unsigned)pboneweight->bone[0]], out1 ); + break; +/* + case 2: + VectorTransform( in1, m_PoseToWorld[pboneweight->bone[0]], out1 ); + out1 *= pboneweight->weight[0]; + VectorTransform( in1, m_PoseToWorld[pboneweight->bone[1]], out2 ); + VectorMA( out1, pboneweight->weight[1], out2, out1 ); + break; + + case 3: + VectorTransform( in1, m_PoseToWorld[pboneweight->bone[0]], out1 ); + out1 *= pboneweight->weight[0]; + VectorTransform( in1, m_PoseToWorld[pboneweight->bone[1]], out2 ); + VectorMA( out1, pboneweight->weight[1], out2, out1 ); + VectorTransform( in1, m_PoseToWorld[pboneweight->bone[2]], out2 ); + VectorMA( out1, pboneweight->weight[2], out2, out1 ); + break; +*/ + default: + VectorFill( out1, 0 ); + for (int i = 0; i < pboneweight->numbones; i++) + { + VectorTransform( in1, m_PoseToWorld[(unsigned)pboneweight->bone[i]], out2 ); + VectorMA( out1, pboneweight->weight[i], out2, out1 ); + } + break; + } +} + + +/* +================ +R_StudioRotate +================ +*/ +inline void CStudioRender::R_StudioRotate( Vector& in1, mstudioboneweight_t *pboneweight, Vector& out1 ) +{ + // NOTE: This only works to rotate normals if there's no scale in the + // pose to world transforms. If we ever add scale, we'll need to + // multiply by the inverse transpose of the pose to world + + if (pboneweight->numbones == 1) + { + VectorRotate( in1, m_PoseToWorld[(unsigned)pboneweight->bone[0]], out1 ); + } + else + { + Vector out2; + + VectorFill( out1, 0 ); + + for (int i = 0; i < pboneweight->numbones; i++) + { + VectorRotate( in1, m_PoseToWorld[(unsigned)pboneweight->bone[i]], out2 ); + VectorMA( out1, pboneweight->weight[i], out2, out1 ); + } + VectorNormalize( out1 ); + } +} + +inline void CStudioRender::R_StudioRotate( Vector4D& realIn1, mstudioboneweight_t *pboneweight, Vector4D& realOut1 ) +{ + // garymcthack - god this sucks. + Vector in1( realIn1[0], realIn1[1], realIn1[2] ); + Vector out1; + if (pboneweight->numbones == 1) + { + VectorRotate( in1, m_PoseToWorld[(unsigned)pboneweight->bone[0]], out1 ); + } + else + { + Vector out2; + + VectorFill( out1, 0 ); + + for (int i = 0; i < pboneweight->numbones; i++) + { + VectorRotate( in1, m_PoseToWorld[(unsigned)pboneweight->bone[i]], out2 ); + VectorMA( out1, pboneweight->weight[i], out2, out1 ); + } + VectorNormalize( out1 ); + } + realOut1.Init( out1[0], out1[1], out1[2], realIn1[3] ); +} + + +//----------------------------------------------------------------------------- +// Compute the contribution of a light depending on it's angle +//----------------------------------------------------------------------------- +/* + light_normal (lights normal translated to same space as other normals) + surface_normal + light_direction_normal | (light_pos - vertex_pos) | +*/ + +template< int nLightType > +class CWorldLightAngleWrapper +{ +public: + FORCEINLINE static float WorldLightAngle( const LightDesc_t *wl, const Vector& lnormal, const Vector& snormal, const Vector& delta ) + { + float dot, dot2, ratio; + + switch (nLightType) + { + case MATERIAL_LIGHT_POINT: +#if 1 + // half-lambert + dot = DotProduct( snormal, delta ); + if (dot < 0.f) + return 0.f; +#else + dot = DotProduct( snormal, delta ) * 0.5 + 0.5; + dot = dot * dot; +#endif + return dot; + + case MATERIAL_LIGHT_SPOT: +#if 1 + // half-lambert + dot = DotProduct( snormal, delta ); + if (dot < 0.) + return 0.f; +#else + dot = DotProduct( snormal, delta ) * 0.5 + 0.5; + dot = dot * dot; +#endif + + dot2 = -DotProduct (delta, lnormal); + if (dot2 <= wl->m_PhiDot) + return 0.f; // outside light cone + + ratio = dot; + if (dot2 >= wl->m_ThetaDot) + return ratio; // inside inner cone + + if ((wl->m_Falloff == 1.f) || (wl->m_Falloff == 0.f)) + { + ratio *= (dot2 - wl->m_PhiDot) / (wl->m_ThetaDot - wl->m_PhiDot); + } + else + { + ratio *= pow((dot2 - wl->m_PhiDot) / (wl->m_ThetaDot - wl->m_PhiDot), wl->m_Falloff ); + } + return ratio; + + case MATERIAL_LIGHT_DIRECTIONAL: +#if 1 + // half-lambert + dot2 = -DotProduct( snormal, lnormal ); + if (dot2 < 0.f) + return 0.f; +#else + dot2 = -DotProduct( snormal, lnormal ) * 0.5 + 0.5; + dot2 = dot2 * dot2; +#endif + return dot2; + + case MATERIAL_LIGHT_DISABLE: + return 0.f; + + NO_DEFAULT; + } + } +}; + +template< int nLightType > +class CWorldLightAngleWrapperConstDirectional +{ +public: + FORCEINLINE static float WorldLightAngle( const LightDesc_t *wl, const Vector& lnormal, const Vector& snormal, const Vector& delta, float directionalamount ) + { + float dot, dot2, ratio; + + // directional amount is constant + dot = directionalamount; + if (dot < 0.f) + return 0.f; + + switch (nLightType) + { + case MATERIAL_LIGHT_POINT: + case MATERIAL_LIGHT_DIRECTIONAL: + return dot; + + case MATERIAL_LIGHT_SPOT: + dot2 = -DotProduct (delta, lnormal); + if (dot2 <= wl->m_PhiDot) + return 0.f; // outside light cone + + ratio = dot; + if (dot2 >= wl->m_ThetaDot) + return ratio; // inside inner cone + + if ((wl->m_Falloff == 1.f) || (wl->m_Falloff == 0.f)) + { + ratio *= (dot2 - wl->m_PhiDot) / (wl->m_ThetaDot - wl->m_PhiDot); + } + else + { + ratio *= pow((dot2 - wl->m_PhiDot) / (wl->m_ThetaDot - wl->m_PhiDot), wl->m_Falloff ); + } + return ratio; + + case MATERIAL_LIGHT_DISABLE: + return 0.f; + + NO_DEFAULT; + } + } +}; + +inline float CStudioRender::R_WorldLightAngle( const LightDesc_t *wl, const Vector& lnormal, const Vector& snormal, const Vector& delta ) +{ + switch (wl->m_Type) + { + case MATERIAL_LIGHT_DISABLE: return CWorldLightAngleWrapper<MATERIAL_LIGHT_DISABLE>::WorldLightAngle( wl, lnormal, snormal, delta ); + case MATERIAL_LIGHT_POINT: return CWorldLightAngleWrapper<MATERIAL_LIGHT_POINT>::WorldLightAngle( wl, lnormal, snormal, delta ); + case MATERIAL_LIGHT_DIRECTIONAL: return CWorldLightAngleWrapper<MATERIAL_LIGHT_DIRECTIONAL>::WorldLightAngle( wl, lnormal, snormal, delta ); + case MATERIAL_LIGHT_SPOT: return CWorldLightAngleWrapper<MATERIAL_LIGHT_SPOT>::WorldLightAngle( wl, lnormal, snormal, delta ); + NO_DEFAULT; + } +} + + +//----------------------------------------------------------------------------- +// Draws eyeballs +//----------------------------------------------------------------------------- +inline void CStudioRender::R_StudioEyeballNormal( mstudioeyeball_t const* peyeball, Vector& org, + Vector& pos, Vector& normal ) +{ + // inside of a flattened torus + VectorSubtract( pos, org, normal ); + float flUpAmount = DotProduct( normal, peyeball->up ); + VectorMA( normal, -0.5 * flUpAmount, peyeball->up, normal ); + VectorNormalize( normal ); +} + + +//----------------------------------------------------------------------------- +// +// Stateless utility methods +// +//----------------------------------------------------------------------------- + +// Computes the submodel for a specified body + bodypart +int R_StudioSetupModel( int nBodyPart, int nBody, mstudiomodel_t **pSubModel, const studiohdr_t *pStudioHdr ); + +// Computes PoseToWorld from BoneToWorld +void ComputePoseToWorld( matrix3x4_t *pPoseToWorld, studiohdr_t *pStudioHdr, int boneMask, const Vector& vecViewOrigin, const matrix3x4_t *pBoneToWorld ); + +// Computes the model LOD +inline int ComputeModelLODAndMetric( studiohwdata_t *pHardwareData, float flUnitSphereSize, float *pMetric ) +{ + // NOTE: This function was split off since CStudioRender needs it also. + float flMetric = pHardwareData->LODMetric( flUnitSphereSize ); + if ( pMetric ) + { + *pMetric = flMetric; + } + return pHardwareData->GetLODForMetric( flMetric ); +} + + + +#endif // CSTUDIORENDER_H diff --git a/studiorender/studiorender.vpc b/studiorender/studiorender.vpc new file mode 100644 index 0000000..84fc607 --- /dev/null +++ b/studiorender/studiorender.vpc @@ -0,0 +1,114 @@ +//----------------------------------------------------------------------------- +// STUDIORENDER.VPC +// +// Project Script +//----------------------------------------------------------------------------- + +$macro SRCDIR ".." +$Macro OUTBINDIR "$SRCDIR\..\game\bin" + +$include "$SRCDIR\vpc_scripts\source_dll_base.vpc" + +$Configuration +{ + $Linker + { + $SystemLibraries "iconv" [$OSXALL] + } + $Compiler + { + $PreprocessorDefinitions "$BASE;STUDIORENDER_EXPORTS;PROTECTED_THINGS_ENABLE" + $PreprocessorDefinitions "$BASE;fopen=dont_use_fopen" [$WIN32] + } +} + +$Project "StudioRender" +{ + $Folder "Source Files" + { + $File "studiorender.cpp" + $File "studiorendercontext.cpp" + $File "flexrenderdata.cpp" + $File "r_studio.cpp" + $File "r_studiodecal.cpp" + $File "r_studiodraw.cpp" + $File "r_studiodraw_computeflexedvertex.cpp" + $File "r_studioflex.cpp" + $File "r_studiogettriangles.cpp" + $File "r_studiolight.cpp" + $File "r_studiostats.cpp" + } + + $Folder "Header Files" + { + $File "r_studiolight.h" + $File "studiorender.h" + $File "studiorendercontext.h" + $File "flexrenderdata.h" + } + + $Folder "Public Header Files" + { + $File "$SRCDIR\public\mathlib\amd3dx.h" + $File "$SRCDIR\public\basehandle.h" + $File "$SRCDIR\public\tier0\basetypes.h" + $File "$SRCDIR\public\bspflags.h" + $File "$SRCDIR\public\clientstats.h" + $File "$SRCDIR\public\cmodel.h" + $File "$SRCDIR\public\tier0\commonmacros.h" + $File "$SRCDIR\public\mathlib\compressed_vector.h" + $File "$SRCDIR\public\const.h" + $File "$SRCDIR\public\tier1\convar.h" + $File "$SRCDIR\public\tier0\dbg.h" + $File "$SRCDIR\public\tier0\fasttimer.h" + $File "$SRCDIR\public\gametrace.h" + $File "$SRCDIR\public\appframework\IAppSystem.h" + $File "$SRCDIR\public\tier0\icommandline.h" + $File "$SRCDIR\public\ihandleentity.h" + $File "$SRCDIR\public\materialsystem\imaterial.h" + $File "$SRCDIR\public\materialsystem\imaterialsystem.h" + $File "$SRCDIR\public\materialsystem\imaterialsystemhardwareconfig.h" + $File "$SRCDIR\public\materialsystem\imaterialvar.h" + $File "$SRCDIR\public\materialsystem\imesh.h" + $File "$SRCDIR\public\tier1\interface.h" + $File "$SRCDIR\public\istudiorender.h" + $File "$SRCDIR\public\materialsystem\itexture.h" + $File "$SRCDIR\public\mathlib\mathlib.h" + $File "$SRCDIR\public\measure_section.h" + $File "$SRCDIR\public\tier0\mem.h" + $File "$SRCDIR\public\tier0\memalloc.h" + $File "$SRCDIR\public\tier0\memdbgoff.h" + $File "$SRCDIR\public\tier0\memdbgon.h" + $File "$SRCDIR\public\model_types.h" + $File "$SRCDIR\public\optimize.h" + $File "$SRCDIR\public\pixelwriter.h" + $File "$SRCDIR\public\tier0\platform.h" + $File "$SRCDIR\public\tier0\protected_things.h" + $File "$SRCDIR\public\string_t.h" + $File "$SRCDIR\public\tier1\strtools.h" + $File "$SRCDIR\public\studio.h" + $File "$SRCDIR\public\tier1\utlbuffer.h" + $File "$SRCDIR\public\tier1\utllinkedlist.h" + $File "$SRCDIR\public\tier1\utlmemory.h" + $File "$SRCDIR\public\tier1\utlvector.h" + $File "$SRCDIR\public\vcollide.h" + $File "$SRCDIR\public\mathlib\vector.h" + $File "$SRCDIR\public\mathlib\vector2d.h" + $File "$SRCDIR\public\mathlib\vector4d.h" + $File "$SRCDIR\public\mathlib\vmatrix.h" + $File "$SRCDIR\public\mathlib\vplane.h" + $File "$SRCDIR\public\tier0\vprof.h" + $File "$SRCDIR\public\vstdlib\vstdlib.h" + $File "$SRCDIR\public\vtf\vtf.h" + $File "$SRCDIR\public\tier1\UtlStringMap.h" + } + + $folder "Link Libraries" + { + $Lib bitmap + $Lib mathlib + $Lib tier2 + $Lib tier3 + } + +} diff --git a/studiorender/studiorendercontext.cpp b/studiorender/studiorendercontext.cpp new file mode 100644 index 0000000..2d857da --- /dev/null +++ b/studiorender/studiorendercontext.cpp @@ -0,0 +1,2454 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#include <stdlib.h> +#include "tier0/platform.h" +#include "studiorendercontext.h" +#include "optimize.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/imesh.h" +#include "materialsystem/imorph.h" +#include "materialsystem/ivballoctracker.h" +#include "vstdlib/random.h" +#include "tier0/tslist.h" +#include "tier0/platform.h" +#include "tier1/refcount.h" +#include "tier1/callqueue.h" +#include "cmodel.h" +#include "tier0/vprof.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + + +// garymcthack - this should go elsewhere +#define MAX_NUM_BONE_INDICES 4 + + +//----------------------------------------------------------------------------- +// Toggles studio queued mode +//----------------------------------------------------------------------------- +void StudioChangeCallback( IConVar *var, const char *pOldValue, float flOldValue ) +{ + // NOTE: This is necessary to flush the queued thread when this value changes + MaterialLock_t hLock = g_pMaterialSystem->Lock(); + g_pMaterialSystem->Unlock( hLock ); +} + +static ConVar studio_queue_mode( "studio_queue_mode", "1", 0, "", StudioChangeCallback ); + + +//----------------------------------------------------------------------------- +// Globals +//----------------------------------------------------------------------------- +static float s_pZeroFlexWeights[MAXSTUDIOFLEXDESC]; + + +//----------------------------------------------------------------------------- +// Singleton instance +//----------------------------------------------------------------------------- +IStudioDataCache *g_pStudioDataCache = NULL; +static CStudioRenderContext s_StudioRenderContext; +EXPOSE_SINGLE_INTERFACE_GLOBALVAR( CStudioRenderContext, IStudioRender, + STUDIO_RENDER_INTERFACE_VERSION, s_StudioRenderContext ); + + +//----------------------------------------------------------------------------- +// Constructor, destructor +//----------------------------------------------------------------------------- +CStudioRenderContext::CStudioRenderContext() +{ + // Initialize render context + m_RC.m_pForcedMaterial = NULL; + m_RC.m_nForcedMaterialType = OVERRIDE_NORMAL; + m_RC.m_ColorMod[0] = m_RC.m_ColorMod[1] = m_RC.m_ColorMod[2] = 1.0f; + m_RC.m_AlphaMod = 1.0f; + m_RC.m_ViewOrigin.Init(); + m_RC.m_ViewRight.Init(); + m_RC.m_ViewUp.Init(); + m_RC.m_ViewPlaneNormal.Init(); + m_RC.m_Config.m_bEnableHWMorph = true; + m_RC.m_Config.m_bStatsMode = false; + + m_RC.m_NumLocalLights = 0; + for ( int i = 0; i < 6; ++i ) + { + m_RC.m_LightBoxColors[i].Init( 0, 0, 0 ); + } +} + +CStudioRenderContext::~CStudioRenderContext() +{ +} + + +//----------------------------------------------------------------------------- +// Connect, disconnect +//----------------------------------------------------------------------------- +bool CStudioRenderContext::Connect( CreateInterfaceFn factory ) +{ + if ( !BaseClass::Connect( factory ) ) + return false; + + g_pStudioDataCache = ( IStudioDataCache * )factory( STUDIO_DATA_CACHE_INTERFACE_VERSION, NULL ); + if ( !g_pMaterialSystem || !g_pMaterialSystemHardwareConfig || !g_pStudioDataCache ) + { + Msg("StudioRender failed to connect to a required system\n" ); + } + return ( g_pMaterialSystem && g_pMaterialSystemHardwareConfig && g_pStudioDataCache ); +} + +void CStudioRenderContext::Disconnect() +{ + g_pStudioDataCache = NULL; + BaseClass::Disconnect(); +} + + +//----------------------------------------------------------------------------- +// Here's where systems can access other interfaces implemented by this object +// Returns NULL if it doesn't implement the requested interface +//----------------------------------------------------------------------------- +void *CStudioRenderContext::QueryInterface( const char *pInterfaceName ) +{ + // Loading the studiorender DLL mounts *all* interfaces + CreateInterfaceFn factory = Sys_GetFactoryThis(); // This silly construction is necessary + return factory( pInterfaceName, NULL ); // to prevent the LTCG compiler from crashing. +} + + +//----------------------------------------------------------------------------- +// Init, shutdown +//----------------------------------------------------------------------------- +InitReturnVal_t CStudioRenderContext::Init() +{ + MathLib_Init( 2.2f, 2.2f, 0.0f, 2.0f ); + + InitReturnVal_t nRetVal = BaseClass::Init(); + if ( nRetVal != INIT_OK ) + return nRetVal; + + if( !g_pMaterialSystem || !g_pMaterialSystemHardwareConfig ) + return INIT_FAILED; + + return g_pStudioRenderImp->Init(); +} + +void CStudioRenderContext::Shutdown( void ) +{ + g_pStudioRenderImp->Shutdown(); + BaseClass::Shutdown(); +} + + +//----------------------------------------------------------------------------- +// Used to activate the stub material system. +//----------------------------------------------------------------------------- +void CStudioRenderContext::Mat_Stub( IMaterialSystem *pMatSys ) +{ + g_pMaterialSystem = pMatSys; +} + + +//----------------------------------------------------------------------------- +// Determines material flags +//----------------------------------------------------------------------------- +void CStudioRenderContext::ComputeMaterialFlags( studiohdr_t *phdr, studioloddata_t &lodData, IMaterial *pMaterial ) +{ + // requesting info forces the initial material precache (and its build out) + if ( pMaterial->UsesEnvCubemap() ) + { + phdr->flags |= STUDIOHDR_FLAGS_USES_ENV_CUBEMAP; + } + if ( pMaterial->NeedsPowerOfTwoFrameBufferTexture( false ) ) // The false checks if it will ever need the frame buffer, not just this frame + { + phdr->flags |= STUDIOHDR_FLAGS_USES_FB_TEXTURE; + } + + // FIXME: I'd rather know that the material is definitely using the bumpmap. + // It could be in the file without actually being used. + static unsigned int bumpvarCache = 0; + IMaterialVar *pBumpMatVar = pMaterial->FindVarFast( "$bumpmap", &bumpvarCache ); + if ( pBumpMatVar && pBumpMatVar->IsDefined() && pMaterial->NeedsTangentSpace() ) + { + phdr->flags |= STUDIOHDR_FLAGS_USES_BUMPMAPPING; + } + + // Make sure material is treated as bump mapped if phong is set + static unsigned int phongVarCache = 0; + IMaterialVar *pPhongMatVar = pMaterial->FindVarFast( "$phong", &phongVarCache ); + if ( pPhongMatVar && pPhongMatVar->IsDefined() && ( pPhongMatVar->GetIntValue() != 0 ) ) + { + phdr->flags |= STUDIOHDR_FLAGS_USES_BUMPMAPPING; + } +} + + +//----------------------------------------------------------------------------- +// Does this material use a mouth shader? +//----------------------------------------------------------------------------- +static bool UsesMouthShader( IMaterial *pMaterial ) +{ + // FIXME: hack, needs proper client side material system interface + static unsigned int clientShaderCache = 0; + IMaterialVar *clientShaderVar = pMaterial->FindVarFast( "$clientShader", &clientShaderCache ); + if ( clientShaderVar ) + return ( Q_stricmp( clientShaderVar->GetStringValue(), "MouthShader" ) == 0 ); + return false; +} + + +//----------------------------------------------------------------------------- +// Returns the actual texture name to use on the model +//----------------------------------------------------------------------------- +static const char *GetTextureName( studiohdr_t *phdr, OptimizedModel::FileHeader_t *pVtxHeader, + int lodID, int inMaterialID ) +{ + OptimizedModel::MaterialReplacementListHeader_t *materialReplacementList = + pVtxHeader->pMaterialReplacementList( lodID ); + int i; + for( i = 0; i < materialReplacementList->numReplacements; i++ ) + { + OptimizedModel::MaterialReplacementHeader_t *materialReplacement = + materialReplacementList->pMaterialReplacement( i ); + if( materialReplacement->materialID == inMaterialID ) + { + const char *str = materialReplacement->pMaterialReplacementName(); + return str; + } + } + return phdr->pTexture( inMaterialID )->pszName(); +} + + +//----------------------------------------------------------------------------- +// Loads materials associated with a particular LOD of a model +//----------------------------------------------------------------------------- +void CStudioRenderContext::LoadMaterials( studiohdr_t *phdr, + OptimizedModel::FileHeader_t *pVtxHeader, studioloddata_t &lodData, int lodID ) +{ + typedef IMaterial *IMaterialPtr; + Assert( phdr ); + + lodData.numMaterials = phdr->numtextures; + if ( lodData.numMaterials == 0 ) + { + lodData.ppMaterials = NULL; + return; + } + + lodData.ppMaterials = new IMaterialPtr[lodData.numMaterials]; + Assert( lodData.ppMaterials ); + + lodData.pMaterialFlags = new int[lodData.numMaterials]; + Assert( lodData.pMaterialFlags ); + + int i, j; + + // get index of each material + // set the runtime studiohdr flags that are material derived + if ( phdr->textureindex == 0 ) + return; + + for ( i = 0; i < phdr->numtextures; i++ ) + { + char szPath[MAX_PATH]; + IMaterial *pMaterial = NULL; + + // search through all specified directories until a valid material is found + for ( j = 0; j < phdr->numcdtextures && IsErrorMaterial( pMaterial ); j++ ) + { + // If we don't do this, we get filenames like "materials\\blah.vmt". + const char *textureName = GetTextureName( phdr, pVtxHeader, lodID, i ); + if ( textureName[0] == CORRECT_PATH_SEPARATOR || textureName[0] == INCORRECT_PATH_SEPARATOR ) + ++textureName; + + // This prevents filenames like /models/blah.vmt. + const char *pCdTexture = phdr->pCdtexture( j ); + if ( pCdTexture[0] == CORRECT_PATH_SEPARATOR || pCdTexture[0] == INCORRECT_PATH_SEPARATOR ) + ++pCdTexture; + + V_ComposeFileName( pCdTexture, textureName, szPath, sizeof( szPath ) ); + + if ( phdr->flags & STUDIOHDR_FLAGS_OBSOLETE ) + { + pMaterial = g_pMaterialSystem->FindMaterial( "models/obsolete/obsolete", TEXTURE_GROUP_MODEL, false ); + if ( IsErrorMaterial( pMaterial ) ) + { + Warning( "StudioRender: OBSOLETE material missing: \"models/obsolete/obsolete\"\n" ); + } + } + else + { + pMaterial = g_pMaterialSystem->FindMaterial( szPath, TEXTURE_GROUP_MODEL, false ); + } + } + if ( IsErrorMaterial( pMaterial ) ) + { + // hack - if it isn't found, go through the motions of looking for it again + // so that the materialsystem will give an error. + char szPrefix[256]; + Q_strncpy( szPrefix, phdr->pszName(), sizeof( szPrefix ) ); + Q_strncat( szPrefix, " : ", sizeof( szPrefix ), COPY_ALL_CHARACTERS ); + for ( j = 0; j < phdr->numcdtextures; j++ ) + { + Q_strncpy( szPath, phdr->pCdtexture( j ), sizeof( szPath ) ); + const char *textureName = GetTextureName( phdr, pVtxHeader, lodID, i ); + Q_strncat( szPath, textureName, sizeof( szPath ), COPY_ALL_CHARACTERS ); + Q_FixSlashes( szPath, CORRECT_PATH_SEPARATOR ); + g_pMaterialSystem->FindMaterial( szPath, TEXTURE_GROUP_MODEL, true, szPrefix ); + } + } + + lodData.ppMaterials[i] = pMaterial; + if ( pMaterial ) + { + // Increment the reference count for the material. + pMaterial->IncrementReferenceCount(); + ComputeMaterialFlags( phdr, lodData, pMaterial ); + lodData.pMaterialFlags[i] = UsesMouthShader( pMaterial ) ? 1 : 0; + } + } +} + + +//----------------------------------------------------------------------------- +// Suppresses all hw morphs on a model +//----------------------------------------------------------------------------- +static void SuppressAllHWMorphs( mstudiomodel_t *pModel, OptimizedModel::ModelLODHeader_t *pVtxLOD ) +{ + for ( int k = 0; k < pModel->nummeshes; ++k ) + { + OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(k); + for (int i = 0; i < pVtxMesh->numStripGroups; ++i ) + { + OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(i); + if ( ( pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_DELTA_FLEXED ) ) + { + pStripGroup->flags |= OptimizedModel::STRIPGROUP_SUPPRESS_HW_MORPH; + } + } + } +} + + +//----------------------------------------------------------------------------- +// Computes the total flexes on a model +//----------------------------------------------------------------------------- +static int ComputeTotalFlexCount( mstudiomodel_t *pModel ) +{ + int nFlexCount = 0; + for ( int k = 0; k < pModel->nummeshes; ++k ) + { + mstudiomesh_t* pMesh = pModel->pMesh(k); + nFlexCount += pMesh->numflexes; + } + return nFlexCount; +} + + +//----------------------------------------------------------------------------- +// Count deltas affecting a particular stripgroup +//----------------------------------------------------------------------------- +int CStudioRenderContext::CountDeltaFlexedStripGroups( mstudiomodel_t *pModel, OptimizedModel::ModelLODHeader_t *pVtxLOD ) +{ + int nFlexedStripGroupCount = 0; + for ( int k = 0; k < pModel->nummeshes; ++k ) + { + Assert( pModel->nummeshes == pVtxLOD->numMeshes ); + OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(k); + for (int i = 0; i < pVtxMesh->numStripGroups; ++i ) + { + OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(i); + if ( ( pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_DELTA_FLEXED ) == 0 ) + continue; + ++nFlexedStripGroupCount; + } + } + return nFlexedStripGroupCount; +} + + +//----------------------------------------------------------------------------- +// Count vertices affected by deltas in a particular strip group +//----------------------------------------------------------------------------- +int CStudioRenderContext::CountFlexedVertices( mstudiomesh_t* pMesh, OptimizedModel::StripGroupHeader_t* pStripGroup ) +{ + if ( !pMesh->numflexes ) + return 0; + + // an inverse mapping from mesh index to strip group index + unsigned short *pMeshIndexToGroupIndex = (unsigned short*)_alloca( pMesh->pModel()->numvertices * sizeof(unsigned short) ); + memset( pMeshIndexToGroupIndex, 0xFF, pMesh->pModel()->numvertices * sizeof(unsigned short) ); + for ( int i = 0; i < pStripGroup->numVerts; ++i ) + { + int nMeshVert = pStripGroup->pVertex(i)->origMeshVertID; + pMeshIndexToGroupIndex[ nMeshVert ] = (unsigned short)i; + } + + int nFlexVertCount = 0; + for ( int i = 0; i < pMesh->numflexes; ++i ) + { + mstudioflex_t *pFlex = pMesh->pFlex( i ); + byte *pVAnim = pFlex->pBaseVertanim(); + int nVAnimSizeBytes = pFlex->VertAnimSizeBytes(); + for ( int j = 0; j < pFlex->numverts; ++j ) + { + mstudiovertanim_t *pAnim = (mstudiovertanim_t*)( pVAnim + j * nVAnimSizeBytes ); + int nMeshVert = pAnim->index; + unsigned short nGroupVert = pMeshIndexToGroupIndex[nMeshVert]; + + // In this case, this vertex is not part of this meshgroup. Ignore it. + if ( nGroupVert != 0xFFFF ) + { + // Only count it once + pMeshIndexToGroupIndex[nMeshVert] = 0xFFFF; + ++nFlexVertCount; + } + } + } + + return nFlexVertCount; +} + + +//----------------------------------------------------------------------------- +// Determine if any strip groups shouldn't be morphed +//----------------------------------------------------------------------------- +static int* s_pVertexCount; +static int SortVertCount( const void *arg1, const void *arg2 ) +{ + /* Compare all of both strings: */ + return s_pVertexCount[*( const int* )arg2] - s_pVertexCount[*( const int* )arg1]; +} + +#define MIN_HWMORPH_FLEX_COUNT 200 + +void CStudioRenderContext::DetermineHWMorphing( mstudiomodel_t *pModel, OptimizedModel::ModelLODHeader_t *pVtxLOD ) +{ + if ( !g_pMaterialSystemHardwareConfig->HasFastVertexTextures() ) + return; + + // There is fixed cost to using HW morphing in the form of setting rendertargets. + // Therefore if there is a low chance of there being enough work, then do it in software. + int nTotalFlexCount = ComputeTotalFlexCount( pModel ); + if ( nTotalFlexCount == 0 ) + return; + + if ( nTotalFlexCount < MIN_HWMORPH_FLEX_COUNT ) + { + SuppressAllHWMorphs( pModel, pVtxLOD ); + return; + } + + // If we have less meshes than the most morphs we can do in a batch, we're done. + int nMaxHWMorphBatchCount = g_pMaterialSystemHardwareConfig->MaxHWMorphBatchCount(); + bool bHWMorph = ( pModel->nummeshes <= nMaxHWMorphBatchCount ); + if ( bHWMorph ) + return; + + // If we have less flexed strip groups than the most we can do in a batch, we're done. + int nFlexedStripGroup = CountDeltaFlexedStripGroups( pModel, pVtxLOD ); + if ( nFlexedStripGroup <= nMaxHWMorphBatchCount ) + return; + + // Finally, the expensive method. Do HW morphing on the N most expensive strip groups + + // FIXME: We should do this at studiomdl time? + // Certainly counting the # of flexed vertices can be done at studiomdl time. + int *pVertexCount = (int*)_alloca( nFlexedStripGroup * sizeof(int) ); + int nCount = 0; + for ( int k = 0; k < pModel->nummeshes; ++k ) + { + Assert( pModel->nummeshes == pVtxLOD->numMeshes ); + mstudiomesh_t* pMesh = pModel->pMesh(k); + OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(k); + for (int i = 0; i < pVtxMesh->numStripGroups; ++i ) + { + OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(i); + if ( ( pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_DELTA_FLEXED ) == 0 ) + continue; + + pVertexCount[nCount++] = CountFlexedVertices( pMesh, pStripGroup ); + } + } + + int *pSortedVertexIndices = (int*)_alloca( nFlexedStripGroup * sizeof(int) ); + for ( int i = 0; i < nFlexedStripGroup; ++i ) + { + pSortedVertexIndices[i] = i; + } + s_pVertexCount = pVertexCount; + qsort( pSortedVertexIndices, nCount, sizeof(int), SortVertCount ); + + bool *pSuppressHWMorph = (bool*)_alloca( nFlexedStripGroup * sizeof(bool) ); + memset( pSuppressHWMorph, 1, nFlexedStripGroup * sizeof(bool) ); + for ( int i = 0; i < nMaxHWMorphBatchCount; ++i ) + { + pSuppressHWMorph[pSortedVertexIndices[i]] = false; + } + + // Bleah. Pretty lame. We should change StripGroupHeader_t to store the flex vertex count + int nIndex = 0; + for ( int k = 0; k < pModel->nummeshes; ++k ) + { + Assert( pModel->nummeshes == pVtxLOD->numMeshes ); + OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(k); + for (int i = 0; i < pVtxMesh->numStripGroups; ++i ) + { + OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(i); + if ( ( pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_DELTA_FLEXED ) == 0 ) + continue; + + if ( pSuppressHWMorph[nIndex] ) + { + pStripGroup->flags |= OptimizedModel::STRIPGROUP_SUPPRESS_HW_MORPH; + } + ++nIndex; + } + } +} + + +//----------------------------------------------------------------------------- +// Adds a vertex to the meshbuilder. Returns false if boneweights did not sum to 1.0 +//----------------------------------------------------------------------------- +template <VertexCompressionType_t T> bool CStudioRenderContext::R_AddVertexToMesh( const char *pModelName, bool bNeedsTangentSpace, CMeshBuilder& meshBuilder, + OptimizedModel::Vertex_t* pVertex, mstudiomesh_t* pMesh, const mstudio_meshvertexdata_t *vertData, bool hwSkin ) +{ + bool bOK = true; + int idx = pVertex->origMeshVertID; + + mstudiovertex_t &vert = *vertData->Vertex( idx ); + + // FIXME: if this ever becomes perf-critical... these writes are not in memory-ascending order, + // which hurts since VBs are in write-combined memory (See WriteCombineOrdering_t) + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.CompressedNormal3fv<T>( vert.m_vecNormal.Base() ); + /* + if( vert.m_vecNormal.Length() < .9f || vert.m_vecNormal.Length() > 1.1f ) + { + static CUtlStringMap<bool> errorMessages; + if( !errorMessages.Defined( pModelName ) ) + { + errorMessages[pModelName] = true; + Warning( "MODELBUG %s: bad normal\n", pModelName ); + Warning( "\tnormal %0.1f %0.1f %0.1f pos: %0.1f %0.1f %0.1f\n", + vert.m_vecNormal.x, vert.m_vecNormal.y, vert.m_vecNormal.z, + vert.m_vecPosition.x, vert.m_vecPosition.y, vert.m_vecPosition.z ); + } + } + */ + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + + if (vertData->HasTangentData()) + { + /* + if( bNeedsTangentSpace && pModelName && vertData->TangentS( idx ) ) + { + const Vector4D &tangentS = *vertData->TangentS( idx ); + float w = tangentS.w; + if( !( w == 1.0f || w == -1.0f ) ) + { + static CUtlStringMap<bool> errorMessages; + if( !errorMessages.Defined( pModelName ) ) + { + errorMessages[pModelName] = true; + Warning( "MODELBUG %s: bad tangent sign\n", pModelName ); + Warning( "\tsign %0.1f at position %0.1f %0.1f %0.1f\n", + w, vert.m_vecPosition.x, vert.m_vecPosition.y, vert.m_vecPosition.z ); + } + } + + float len = tangentS.AsVector3D().Length(); + if( len < .9f || len > 1.1f ) + { + static CUtlStringMap<bool> errorMessages; + if( !errorMessages.Defined( pModelName ) ) + { + errorMessages[pModelName] = true; + Warning( "MODELBUG %s: bad tangent vector\n", pModelName ); + Warning( "\ttangent: %0.1f %0.1f %0.1f with length %0.1f at position %0.1f %0.1f %0.1f\n", + tangentS.x, tangentS.y, tangentS.z, + len, + vert.m_vecPosition.x, vert.m_vecPosition.y, vert.m_vecPosition.z ); + } + } + + #if 0 + float dot = DotProduct( vert.m_vecNormal, tangentS.AsVector3D() ); + if( dot > .95 || dot < -.95 ) + { + static CUtlStringMap<bool> errorMessages; + if( !errorMessages.Defined( pModelName ) ) + { + errorMessages[pModelName] = true; + // this is crashing for some reason. .need to investigate. + Warning( "MODELBUG %s: nearly colinear tangentS (%f %f %f) and normal (%f %f %f) at position %f %f %f Probably have 2 or more texcoords that are the same on a triangle.\n", + pModelName, tangentS.x, tangentS.y, tangentS.y, vert.m_vecNormal.x, vert.m_vecNormal.y, vert.m_vecNormal.z, vert.m_vecPosition.x, vert.m_vecPosition.y, vert.m_vecPosition.z ); + } + } + #endif + } + */ + + // send down tangent S as a 4D userdata vect. + meshBuilder.CompressedUserData<T>( (*vertData->TangentS( idx )).Base() ); + } + + // Just in case we get hooked to a material that wants per-vertex color + meshBuilder.Color4ub( 255, 255, 255, 255 ); + + float boneWeights[ MAX_NUM_BONE_INDICES ]; + if ( hwSkin ) + { + // sum up weights.. + int i; + + // We have to do this because since we're potentially dropping bones + // to get them to fit in hardware, we'll need to renormalize based on + // the actual total. + mstudioboneweight_t *pBoneWeight = vertData->BoneWeights(idx); + + // NOTE: We use pVertex->numbones because that's the number of bones actually influencing this + // vertex. Note that pVertex->numBones is not necessary the *desired* # of bones influencing this + // vertex; we could have collapsed some of those bones out. pBoneWeight->numbones stures the desired # + float totalWeight = 0; + for (i = 0; i < pVertex->numBones; ++i) + { + totalWeight += pBoneWeight->weight[pVertex->boneWeightIndex[i]]; + } + + // The only way we should not add up to 1 is if there's more than 3 *desired* bones + // and more than 1 *actual* bone (we can have 0 vertex bones in the case of static props + if ( (pVertex->numBones > 0) && (pBoneWeight->numbones <= 3) && fabs(totalWeight - 1.0f) > 1e-3 ) + { + // force them to re-normalize + bOK = false; + totalWeight = 1.0f; + } + + // Fix up the static prop case + if ( totalWeight == 0.0f ) + { + totalWeight = 1.0f; + } + + float invTotalWeight = 1.0f / totalWeight; + + // It is essential to iterate over all actual bones so that the bone indices + // are set correctly, even though the last bone weight is computed in a shader program + for (i = 0; i < pVertex->numBones; ++i) + { + if ( pVertex->boneID[i] == -1 ) + { + boneWeights[ i ] = 0.0f; + meshBuilder.BoneMatrix( i, BONE_MATRIX_INDEX_INVALID ); + } + else + { + float weight = pBoneWeight->weight[pVertex->boneWeightIndex[i]]; + boneWeights[ i ] = weight * invTotalWeight; + meshBuilder.BoneMatrix( i, pVertex->boneID[i] ); + } + } + for( ; i < MAX_NUM_BONE_INDICES; i++ ) + { + boneWeights[ i ] = 0.0f; + meshBuilder.BoneMatrix( i, BONE_MATRIX_INDEX_INVALID ); + } + } + else + { + for (int i = 0; i < MAX_NUM_BONE_INDICES; ++i) + { + boneWeights[ i ] = (i == 0) ? 1.0f : 0.0f; + meshBuilder.BoneMatrix( i, BONE_MATRIX_INDEX_INVALID ); + } + } + + // Set all the weights at once (the meshbuilder performs additional, post-compression, normalization): + Assert( pVertex->numBones <= 3 ); + + if ( pVertex->numBones > 0 ) + { + meshBuilder.CompressedBoneWeight3fv<T>( &( boneWeights[ 0 ] ) ); + } + + meshBuilder.AdvanceVertex(); + + return bOK; +} + +// Get (uncompressed) vertex data from a mesh, if available +inline const mstudio_meshvertexdata_t * GetFatVertexData( mstudiomesh_t * pMesh, studiohdr_t * pStudioHdr ) +{ + if ( !pMesh->pModel()->CacheVertexData( pStudioHdr ) ) + { + // not available yet + return NULL; + } + const mstudio_meshvertexdata_t *pVertData = pMesh->GetVertexData( pStudioHdr ); + Assert( pVertData ); + if ( !pVertData ) + { + static unsigned int warnCount = 0; + if ( warnCount++ < 20 ) + Warning( "ERROR: model verts have been compressed, cannot render! (use \"-no_compressed_vvds\")" ); + } + return pVertData; +} + +//----------------------------------------------------------------------------- +// Builds the group +//----------------------------------------------------------------------------- +void CStudioRenderContext::R_StudioBuildMeshGroup( const char *pModelName, bool bNeedsTangentSpace, studiomeshgroup_t* pMeshGroup, + OptimizedModel::StripGroupHeader_t *pStripGroup, mstudiomesh_t* pMesh, + studiohdr_t *pStudioHdr, VertexFormat_t vertexFormat ) +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // We have to do this here because of skinning; there may be any number of + // materials that are applied to this mesh. + // Copy over all the vertices + indices in this strip group + pMeshGroup->m_pMesh = pRenderContext->CreateStaticMesh( vertexFormat, TEXTURE_GROUP_STATIC_VERTEX_BUFFER_MODELS ); + + VertexCompressionType_t compressionType = CompressionType( vertexFormat ); + + pMeshGroup->m_ColorMeshID = -1; + + bool hwSkin = (pMeshGroup->m_Flags & MESHGROUP_IS_HWSKINNED) != 0; + + // This mesh could have tristrips or trilists in it + CMeshBuilder meshBuilder; + meshBuilder.SetCompressionType( compressionType ); + meshBuilder.Begin( pMeshGroup->m_pMesh, MATERIAL_HETEROGENOUS, + hwSkin ? pStripGroup->numVerts : 0, pStripGroup->numIndices ); + + int i; + bool bBadBoneWeights = false; + if ( hwSkin ) + { + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pMesh, pStudioHdr ); + Assert( vertData ); + + for ( i = 0; i < pStripGroup->numVerts; ++i ) + { + bool success; + switch ( compressionType ) + { + case VERTEX_COMPRESSION_ON: + success = R_AddVertexToMesh<VERTEX_COMPRESSION_ON>( pModelName, bNeedsTangentSpace, meshBuilder, pStripGroup->pVertex(i), pMesh, vertData, hwSkin ); + break; + case VERTEX_COMPRESSION_NONE: + default: + success = R_AddVertexToMesh<VERTEX_COMPRESSION_NONE>( pModelName, bNeedsTangentSpace, meshBuilder, pStripGroup->pVertex(i), pMesh, vertData, hwSkin ); + break; + } + if ( !success ) + { + bBadBoneWeights = true; + } + } + } + + if ( bBadBoneWeights ) + { + mstudiomodel_t* pModel = pMesh->pModel(); + ConMsg( "Bad data found in model \"%s\" (bad bone weights)\n", pModel->pszName() ); + } + + for (i = 0; i < pStripGroup->numIndices; ++i) + { + meshBuilder.Index( *pStripGroup->pIndex(i) ); + meshBuilder.AdvanceIndex(); + } + + meshBuilder.End(); + + // Copy over the strip indices. We need access to the indices for decals + pMeshGroup->m_pIndices = new unsigned short[ pStripGroup->numIndices ]; + memcpy( pMeshGroup->m_pIndices, pStripGroup->pIndex(0), + pStripGroup->numIndices * sizeof(unsigned short) ); + + // Compute the number of non-degenerate trianges in each strip group + // for statistics gathering + pMeshGroup->m_pUniqueTris = new int[ pStripGroup->numStrips ]; + for (i = 0; i < pStripGroup->numStrips; ++i ) + { + int numUnique = 0; + if (pStripGroup->pStrip(i)->flags & OptimizedModel::STRIP_IS_TRISTRIP) + { + int last[2] = {-1, -1}; + int curr = pStripGroup->pStrip(i)->indexOffset; + int end = curr + pStripGroup->pStrip(i)->numIndices; + while (curr != end) + { + int idx = *pStripGroup->pIndex(curr); + if (idx != last[0] && idx != last[1] && last[0] != last[1] && last[0] != -1) + ++numUnique; + last[0] = last[1]; + last[1] = idx; + ++curr; + } + } + else + { + numUnique = pStripGroup->pStrip(i)->numIndices / 3; + } + pMeshGroup->m_pUniqueTris[i] = numUnique; + } +} + +//----------------------------------------------------------------------------- +// Builds the group +//----------------------------------------------------------------------------- +void CStudioRenderContext::R_StudioBuildMorph( studiohdr_t *pStudioHdr, + studiomeshgroup_t* pMeshGroup, mstudiomesh_t* pMesh, + OptimizedModel::StripGroupHeader_t *pStripGroup ) +{ + if ( !g_pMaterialSystemHardwareConfig->HasFastVertexTextures() || + ( ( pMeshGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) == 0 ) || + ( ( pStripGroup->flags & OptimizedModel::STRIPGROUP_SUPPRESS_HW_MORPH ) != 0 ) ) + { + pMeshGroup->m_pMorph = NULL; + return; + } + + // Build an inverse mapping from mesh index to strip group index + unsigned short *pMeshIndexToGroupIndex = (unsigned short*)_alloca( pMesh->pModel()->numvertices * sizeof(unsigned short) ); + memset( pMeshIndexToGroupIndex, 0xFF, pMesh->pModel()->numvertices * sizeof(unsigned short) ); + for ( int i = 0; i < pStripGroup->numVerts; ++i ) + { + int nMeshVert = pStripGroup->pVertex(i)->origMeshVertID; + pMeshIndexToGroupIndex[ nMeshVert ] = (unsigned short)i; + } + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + MorphFormat_t morphType = MORPH_POSITION | MORPH_NORMAL | MORPH_SPEED | MORPH_SIDE; + for ( int i = 0; i < pMesh->numflexes; ++i ) + { + if ( pMesh->pFlex( i )->vertanimtype == STUDIO_VERT_ANIM_WRINKLE ) + { + morphType |= MORPH_WRINKLE; + break; + } + } + + char pTemp[256]; + Q_snprintf( pTemp, sizeof(pTemp), "%s [%p]", pStudioHdr->pszName(), pMeshGroup ); + pMeshGroup->m_pMorph = pRenderContext->CreateMorph( morphType, pTemp ); + + const float flVertAnimFixedPointScale = pStudioHdr->VertAnimFixedPointScale(); + + CMorphBuilder morphBuilder; + morphBuilder.Begin( pMeshGroup->m_pMorph, 1.0f / flVertAnimFixedPointScale ); + + for ( int i = 0; i < pMesh->numflexes; ++i ) + { + mstudioflex_t *pFlex = pMesh->pFlex( i ); + byte *pVAnim = pFlex->pBaseVertanim(); + int nVAnimSizeBytes = pFlex->VertAnimSizeBytes(); + for ( int j = 0; j < pFlex->numverts; ++j ) + { + mstudiovertanim_t *pAnim = (mstudiovertanim_t*)( pVAnim + j * nVAnimSizeBytes ); + int nMeshVert = pAnim->index; + unsigned short nGroupVert = pMeshIndexToGroupIndex[nMeshVert]; + + // In this case, this vertex is not part of this meshgroup. Ignore it. + if ( nGroupVert == 0xFFFF ) + continue; + + morphBuilder.PositionDelta3( pAnim->GetDeltaFixed( flVertAnimFixedPointScale ) ); + morphBuilder.NormalDelta3( pAnim->GetNDeltaFixed( flVertAnimFixedPointScale ) ); + morphBuilder.Speed1f( pAnim->speed / 255.0f ); + morphBuilder.Side1f( pAnim->side / 255.0f ); + if ( pFlex->vertanimtype == STUDIO_VERT_ANIM_WRINKLE ) + { + mstudiovertanim_wrinkle_t *pWrinkleAnim = static_cast<mstudiovertanim_wrinkle_t*>( pAnim ); + morphBuilder.WrinkleDelta1f( pWrinkleAnim->GetWrinkleDeltaFixed( flVertAnimFixedPointScale ) ); + } + else + { + morphBuilder.WrinkleDelta1f( 0.0f ); + } + + morphBuilder.AdvanceMorph( nGroupVert, i ); + } + } + + morphBuilder.End(); +} + + +//----------------------------------------------------------------------------- +// Builds the strip data +//----------------------------------------------------------------------------- +void CStudioRenderContext::R_StudioBuildMeshStrips( studiomeshgroup_t* pMeshGroup, + OptimizedModel::StripGroupHeader_t *pStripGroup ) +{ + // FIXME: This is bogus + // Compute the amount of memory we need to store the strip data + int i; + int stripDataSize = 0; + for( i = 0; i < pStripGroup->numStrips; ++i ) + { + stripDataSize += sizeof(OptimizedModel::StripHeader_t); + stripDataSize += pStripGroup->pStrip(i)->numBoneStateChanges * + sizeof(OptimizedModel::BoneStateChangeHeader_t); + } + + pMeshGroup->m_pStripData = (OptimizedModel::StripHeader_t*)malloc(stripDataSize); + + // Copy over the strip info + int boneStateChangeOffset = pStripGroup->numStrips * sizeof(OptimizedModel::StripHeader_t); + for( i = 0; i < pStripGroup->numStrips; ++i ) + { + memcpy( &pMeshGroup->m_pStripData[i], pStripGroup->pStrip(i), + sizeof( OptimizedModel::StripHeader_t ) ); + + // Fixup the bone state change offset, since we have it right after the strip data + pMeshGroup->m_pStripData[i].boneStateChangeOffset = boneStateChangeOffset - + i * sizeof(OptimizedModel::StripHeader_t); + + // copy over bone state changes + int boneWeightSize = pMeshGroup->m_pStripData[i].numBoneStateChanges * + sizeof(OptimizedModel::BoneStateChangeHeader_t); + + if (boneWeightSize != 0) + { + unsigned char* pBoneStateChange = (unsigned char*)pMeshGroup->m_pStripData + boneStateChangeOffset; + memcpy( pBoneStateChange, pStripGroup->pStrip(i)->pBoneStateChange(0), boneWeightSize); + + boneStateChangeOffset += boneWeightSize; + } + } + pMeshGroup->m_NumStrips = pStripGroup->numStrips; +} + + +//----------------------------------------------------------------------------- +// Determine the max. number of bone weights used by a stripgroup +//----------------------------------------------------------------------------- +int CStudioRenderContext::GetNumBoneWeights( const OptimizedModel::StripGroupHeader_t *pGroup ) +{ + int nBoneWeightsMax = 0; + + for (int i = 0;i < pGroup->numStrips; i++) + { + OptimizedModel::StripHeader_t * pStrip = pGroup->pStrip( i ); + nBoneWeightsMax = max( nBoneWeightsMax, (int)pStrip->numBones ); + } + + return nBoneWeightsMax; +} + +//----------------------------------------------------------------------------- +// Determine an actual model vertex format for a mesh based on its material usage. +// Bypasses the homegenous model vertex format in favor of the actual format. +// Ideally matches 1:1 the shader's data requirements without any bloat. +//----------------------------------------------------------------------------- +VertexFormat_t CStudioRenderContext::CalculateVertexFormat( const studiohdr_t *pStudioHdr, const studioloddata_t *pStudioLodData, + const mstudiomesh_t* pMesh, OptimizedModel::StripGroupHeader_t *pGroup, bool bIsHwSkinned ) +{ + bool bSkinnedMesh = ( pStudioHdr->numbones > 1 ); + int nBoneWeights = GetNumBoneWeights( pGroup ); + + bool bIsDX7 = !g_pMaterialSystemHardwareConfig->SupportsVertexAndPixelShaders(); + bool bIsDX8 = ( g_pMaterialSystemHardwareConfig->GetDXSupportLevel() < 90 ); + if ( bIsDX7 ) + { + // FIXME: this is untested (as of June '07, the engine currently doesn't work with "-dxlevel 70") + if ( bSkinnedMesh ) + return MATERIAL_VERTEX_FORMAT_MODEL_SKINNED_DX7; + else + return MATERIAL_VERTEX_FORMAT_MODEL_DX7; + } + else if ( bIsDX8 ) + { + if ( bSkinnedMesh ) + return MATERIAL_VERTEX_FORMAT_MODEL_SKINNED; + else + return MATERIAL_VERTEX_FORMAT_MODEL; + } + else + { + // DX9+ path (supports vertex compression) + + // iterate each skin table + // determine aggregate vertex format for specified mesh's material + VertexFormat_t newVertexFormat = 0; + //bool bBumpmapping = false; + short *pSkinref = pStudioHdr->pSkinref( 0 ); + for ( int i = 0; i < pStudioHdr->numskinfamilies; i++ ) + { + // FIXME: ### MATERIAL VERTEX FORMATS ARE UNRELIABLE! ### + // + // IMaterial* pMaterial = pStudioLodData->ppMaterials[ pSkinref[ pMesh->material ] ]; + // Assert( pMaterial ); + // VertexFormat_t vertexFormat = pMaterial->GetVertexFormat(); + // newVertexFormat &= ~VERTEX_FORMAT_COMPRESSED; // Decide whether to compress below + // + // FIXME: ### MATERIAL VERTEX FORMATS ARE UNRELIABLE! ### + // we need to go through all the shader CPP code and make sure that the correct vertex format + // is being specified for every single shader combo! We don't have time to fix that before + // shipping Ep2, but should fix it ASAP afterwards. To make catching such errors easier, we + // should Assert in draw calls that the vertexdecl matches vertex shader inputs (note that D3D + // debug DLLs will do that on PC, though it's not as informative as if we do it ourselves). + // So, in the absence of reliable material vertex formats, use the old 'standard' elements + // (we can still omit skinning data - and COLOR for DX8+, where it should come from the + // second static lighting stream): + VertexFormat_t vertexFormat = bIsDX7 ? MATERIAL_VERTEX_FORMAT_MODEL_DX7 : ( MATERIAL_VERTEX_FORMAT_MODEL & ~VERTEX_COLOR ); + + // aggregate single bit settings + newVertexFormat |= vertexFormat & ( ( 1 << VERTEX_LAST_BIT ) - 1 ); + + int nUserDataSize = UserDataSize( vertexFormat ); + if ( nUserDataSize > UserDataSize( newVertexFormat ) ) + { + newVertexFormat &= ~USER_DATA_SIZE_MASK; + newVertexFormat |= VERTEX_USERDATA_SIZE( nUserDataSize ); + } + + for (int j = 0; j < VERTEX_MAX_TEXTURE_COORDINATES; ++j) + { + int nSize = TexCoordSize( j, vertexFormat ); + if ( nSize > TexCoordSize( j, newVertexFormat ) ) + { + newVertexFormat &= ~VERTEX_TEXCOORD_SIZE( j, 0x7 ); + newVertexFormat |= VERTEX_TEXCOORD_SIZE( j, nSize ); + } + } + + // FIXME: re-enable this test, fix it to work and see how much memory we save (Q: why is this different to CStudioRenderContext::MeshNeedsTangentSpace ?) + /*if ( !bBumpmapping && pMaterial->NeedsTangentSpace() ) + { + bool bFound = false; + IMaterialVar *pEnvmapMatVar = pMaterial->FindVar( "$envmap", &bFound, false ); + if ( bFound && pEnvmapMatVar->IsDefined() ) + { + IMaterialVar *pBumpMatVar = pMaterial->FindVar( "$bumpmap", &bFound, false ); + if ( bFound && pBumpMatVar->IsDefined() ) + { + bBumpmapping = true; + } + } + } */ + + pSkinref += pStudioHdr->numskinref; + } + + // Add skinning elements for non-rigid models (with more than one bone weight) + if ( bSkinnedMesh ) + { + if ( nBoneWeights > 0 ) + { + // Always exactly zero or two weights + newVertexFormat |= VERTEX_BONEWEIGHT( 2 ); + } + newVertexFormat |= VERTEX_BONE_INDEX; + } + + + // FIXME: re-enable this (see above) + /*if ( !bBumpmapping ) + { + // no bumpmapping, user data not needed + newVertexFormat &= ~USER_DATA_SIZE_MASK; + }*/ + + // materials on models should never have tangent space as they use userdata + Assert( !(newVertexFormat & VERTEX_TANGENT_SPACE) ); + + // Don't compress the mesh unless it is HW-skinned (we only want to compress static + // VBs, not dynamic ones - that would slow down the MeshBuilder in dynamic use cases). + // Also inspect the vertex data to see if it's appropriate for the vertex element + // compression techniques that we do (e.g. look at UV ranges). + if ( //IsX360() && // Disabled until the craziness is banished + bIsHwSkinned && + ( g_pMaterialSystemHardwareConfig->SupportsCompressedVertices() == VERTEX_COMPRESSION_ON ) ) + { + // this mesh is appropriate for vertex compression + newVertexFormat |= VERTEX_FORMAT_COMPRESSED; + } + + return newVertexFormat; + } +} + +bool CStudioRenderContext::MeshNeedsTangentSpace( studiohdr_t *pStudioHdr, studioloddata_t *pStudioLodData, mstudiomesh_t* pMesh ) +{ + // iterate each skin table + if( !pStudioHdr || !pStudioHdr->pSkinref( 0 ) || !pStudioHdr->numskinfamilies ) + { + return false; + } + short *pSkinref = pStudioHdr->pSkinref( 0 ); + for ( int i=0; i<pStudioHdr->numskinfamilies; i++) + { + IMaterial* pMaterial = pStudioLodData->ppMaterials[pSkinref[pMesh->material]]; + Assert( pMaterial ); + if( !pMaterial ) + { + continue; + } + + // Warning( "*****%s needstangentspace: %d\n", pMaterial->GetName(), pMaterial->NeedsTangentSpace() ? 1 : 0 ); + if( pMaterial->NeedsTangentSpace() ) + { + return true; + } + } + return false; +} + +//----------------------------------------------------------------------------- +// Creates a single mesh +//----------------------------------------------------------------------------- +void CStudioRenderContext::R_StudioCreateSingleMesh( studiohdr_t *pStudioHdr, studioloddata_t *pStudioLodData, + mstudiomesh_t* pMesh, OptimizedModel::MeshHeader_t* pVtxMesh, int numBones, + studiomeshdata_t* pMeshData, int *pColorMeshID ) +{ + // Here are the cases where we don't use any meshes at all... + // In the case of eyes, we're just gonna use dynamic buffers + // because it's the fastest solution (prevents lots of locks) + + bool bNeedsTangentSpace = MeshNeedsTangentSpace( pStudioHdr, pStudioLodData, pMesh ); + + // Each strip group represents a locking group, it's a set of vertices + // that are locked together, and, potentially, software light + skinned together + pMeshData->m_NumGroup = pVtxMesh->numStripGroups; + pMeshData->m_pMeshGroup = new studiomeshgroup_t[pVtxMesh->numStripGroups]; + + for (int i = 0; i < pVtxMesh->numStripGroups; ++i ) + { + OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(i); + studiomeshgroup_t* pMeshGroup = &pMeshData->m_pMeshGroup[i]; + + pMeshGroup->m_MeshNeedsRestore = false; + + // Set the flags... + pMeshGroup->m_Flags = 0; + if (pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_FLEXED) + { + pMeshGroup->m_Flags |= MESHGROUP_IS_FLEXED; + } + + if (pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_DELTA_FLEXED) + { + pMeshGroup->m_Flags |= MESHGROUP_IS_DELTA_FLEXED; + } + + bool bIsHwSkinned = !!(pStripGroup->flags & OptimizedModel::STRIPGROUP_IS_HWSKINNED); + if ( bIsHwSkinned ) + { + pMeshGroup->m_Flags |= MESHGROUP_IS_HWSKINNED; + } + + // get the minimal vertex format for this mesh + VertexFormat_t vertexFormat = CalculateVertexFormat( pStudioHdr, pStudioLodData, pMesh, pStripGroup, bIsHwSkinned ); + + // Build the vertex + index buffers + R_StudioBuildMeshGroup( pStudioHdr->pszName(), bNeedsTangentSpace, pMeshGroup, pStripGroup, pMesh, pStudioHdr, vertexFormat ); + + // Copy over the tristrip and triangle list data + R_StudioBuildMeshStrips( pMeshGroup, pStripGroup ); + + // Builds morph targets + R_StudioBuildMorph( pStudioHdr, pMeshGroup, pMesh, pStripGroup ); + + // Build the mapping from strip group vertex idx to actual mesh idx + pMeshGroup->m_pGroupIndexToMeshIndex = new unsigned short[pStripGroup->numVerts + PREFETCH_VERT_COUNT]; + pMeshGroup->m_NumVertices = pStripGroup->numVerts; + + int j; + for ( j = 0; j < pStripGroup->numVerts; ++j ) + { + pMeshGroup->m_pGroupIndexToMeshIndex[j] = pStripGroup->pVertex(j)->origMeshVertID; + } + + // Extra copies are for precaching... + for ( j = pStripGroup->numVerts; j < pStripGroup->numVerts + PREFETCH_VERT_COUNT; ++j ) + { + pMeshGroup->m_pGroupIndexToMeshIndex[j] = pMeshGroup->m_pGroupIndexToMeshIndex[pStripGroup->numVerts - 1]; + } + + // assign the possibly used color mesh id now + pMeshGroup->m_ColorMeshID = (*pColorMeshID)++; + } +} + + +//----------------------------------------------------------------------------- +// Creates static meshes +//----------------------------------------------------------------------------- +void CStudioRenderContext::R_StudioCreateStaticMeshes( studiohdr_t *pStudioHdr, + OptimizedModel::FileHeader_t *pVtxHdr, studiohwdata_t *pStudioHWData, int nLodID, int *pColorMeshID ) +{ + int i, j, k; + + Assert( pStudioHdr && pVtxHdr && pStudioHWData ); + + pStudioHWData->m_pLODs[nLodID].m_pMeshData = new studiomeshdata_t[pStudioHWData->m_NumStudioMeshes]; + + // Iterate over every body part... + for ( i = 0; i < pStudioHdr->numbodyparts; i++ ) + { + mstudiobodyparts_t* pBodyPart = pStudioHdr->pBodypart(i); + OptimizedModel::BodyPartHeader_t* pVtxBodyPart = pVtxHdr->pBodyPart(i); + + // Iterate over every submodel... + for ( j = 0; j < pBodyPart->nummodels; ++j ) + { + mstudiomodel_t* pModel = pBodyPart->pModel(j); + OptimizedModel::ModelHeader_t* pVtxModel = pVtxBodyPart->pModel(j); + OptimizedModel::ModelLODHeader_t *pVtxLOD = pVtxModel->pLOD( nLodID ); + + // Determine which meshes should be hw morphed + DetermineHWMorphing( pModel, pVtxLOD ); + + // Support tracking of VB allocations + // FIXME: categorise studiomodel allocs more precisely + if ( g_VBAllocTracker ) + { + if ( ( pStudioHdr->numbones > 8 ) || ( pStudioHdr->numflexdesc > 0 ) ) + { + g_VBAllocTracker->TrackMeshAllocations( "R_StudioCreateStaticMeshes (character)" ); + } + else + { + if ( pStudioHdr->flags & STUDIOHDR_FLAGS_STATIC_PROP ) + { + g_VBAllocTracker->TrackMeshAllocations( "R_StudioCreateStaticMeshes (prop_static)" ); + } + else + { + g_VBAllocTracker->TrackMeshAllocations( "R_StudioCreateStaticMeshes (prop_dynamic)" ); + } + } + } + + // Iterate over all the meshes.... + for ( k = 0; k < pModel->nummeshes; ++k ) + { + Assert( pModel->nummeshes == pVtxLOD->numMeshes ); + mstudiomesh_t* pMesh = pModel->pMesh(k); + OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(k); + + Assert( pMesh->meshid < pStudioHWData->m_NumStudioMeshes ); + R_StudioCreateSingleMesh( pStudioHdr, &pStudioHWData->m_pLODs[nLodID], + pMesh, pVtxMesh, pVtxHdr->maxBonesPerVert, + &pStudioHWData->m_pLODs[nLodID].m_pMeshData[pMesh->meshid], pColorMeshID ); + } + + if ( g_VBAllocTracker ) + { + g_VBAllocTracker->TrackMeshAllocations( NULL ); + } + } + } +} + + +//----------------------------------------------------------------------------- +// Destroys static meshes +//----------------------------------------------------------------------------- +void CStudioRenderContext::R_StudioDestroyStaticMeshes( int numStudioMeshes, studiomeshdata_t **ppStudioMeshes ) +{ + if( !*ppStudioMeshes) + return; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // Iterate over every body mesh... + for ( int i = 0; i < numStudioMeshes; ++i ) + { + studiomeshdata_t* pMesh = &((*ppStudioMeshes)[i]); + + for (int j = 0; j < pMesh->m_NumGroup; ++j) + { + studiomeshgroup_t* pGroup = &pMesh->m_pMeshGroup[j]; + if (pGroup->m_pGroupIndexToMeshIndex) + { + delete[] pGroup->m_pGroupIndexToMeshIndex; + pGroup->m_pGroupIndexToMeshIndex = 0; + } + + if (pGroup->m_pUniqueTris) + { + delete [] pGroup->m_pUniqueTris; + pGroup->m_pUniqueTris = 0; + } + + if (pGroup->m_pIndices) + { + delete [] pGroup->m_pIndices; + pGroup->m_pIndices = 0; + } + + if (pGroup->m_pMesh) + { + pRenderContext->DestroyStaticMesh( pGroup->m_pMesh ); + pGroup->m_pMesh = 0; + } + + if (pGroup->m_pMorph) + { + pRenderContext->DestroyMorph( pGroup->m_pMorph ); + pGroup->m_pMorph = 0; + } + + if (pGroup->m_pStripData) + { + free( pGroup->m_pStripData ); + pGroup->m_pStripData = 0; + } + } + + if (pMesh->m_pMeshGroup) + { + delete[] pMesh->m_pMeshGroup; + pMesh->m_pMeshGroup = 0; + } + } + + if ( *ppStudioMeshes ) + { + delete *ppStudioMeshes; + *ppStudioMeshes = 0; + } +} + + +//----------------------------------------------------------------------------- +// Builds the decal bone remap for a particular mesh +//----------------------------------------------------------------------------- +void CStudioRenderContext::BuildDecalBoneMap( studiohdr_t *pStudioHdr, int *pUsedBones, int *pBoneRemap, int *pMaxBoneCount, mstudiomesh_t* pMesh, OptimizedModel::StripGroupHeader_t* pStripGroup ) +{ + const mstudio_meshvertexdata_t *pVertData = GetFatVertexData( pMesh, pStudioHdr ); + Assert( pVertData ); + for ( int i = 0; i < pStripGroup->numVerts; ++i ) + { + int nMeshVert = pStripGroup->pVertex( i )->origMeshVertID; + mstudioboneweight_t &boneWeight = pVertData->Vertex( nMeshVert )->m_BoneWeights; + int nBoneCount = boneWeight.numbones; + for ( int j = 0; j < nBoneCount; ++j ) + { + if ( boneWeight.weight[j] == 0.0f ) + continue; + + if ( pBoneRemap[ (unsigned)boneWeight.bone[j] ] >= 0 ) + continue; + + pBoneRemap[ (unsigned)boneWeight.bone[j] ] = *pUsedBones; + *pUsedBones = *pUsedBones + 1; + } + } + + for ( int i = 0; i < pStripGroup->numStrips; ++i ) + { + if ( pStripGroup->pStrip(i)->numBones > *pMaxBoneCount ) + { + *pMaxBoneCount = pStripGroup->pStrip(i)->numBones; + } + } +} + + +//----------------------------------------------------------------------------- +// For decals on hardware morphing, we must actually do hardware skinning +// because the flex must occur before skinning. +// For this to work, we have to hope that the total # of bones used by +// hw flexed verts is < than the max possible for the dx level we're running under +//----------------------------------------------------------------------------- +void CStudioRenderContext::ComputeHWMorphDecalBoneRemap( studiohdr_t *pStudioHdr, OptimizedModel::FileHeader_t *pVtxHdr, studiohwdata_t *pStudioHWData, int nLOD ) +{ + if ( pStudioHdr->numbones == 0 ) + return; + + // Remaps sw bones to hw bones during decal rendering + // NOTE: Only bones affecting vertices which have hw flexes will be add to this map. + int nBufSize = pStudioHdr->numbones * sizeof(int); + int *pBoneRemap = (int*)_alloca( nBufSize ); + memset( pBoneRemap, 0xFF, nBufSize ); + int nMaxBoneCount = 0; + + // NOTE: HW bone index 0 is always the identity transform during decals. + pBoneRemap[0] = 0; // necessary for unused bones in a vertex + int nUsedBones = 1; + + studioloddata_t *pStudioLOD = &pStudioHWData->m_pLODs[nLOD]; + for ( int i = 0; i < pStudioHdr->numbodyparts; ++i ) + { + mstudiobodyparts_t* pBodyPart = pStudioHdr->pBodypart(i); + OptimizedModel::BodyPartHeader_t* pVtxBodyPart = pVtxHdr->pBodyPart(i); + + // Iterate over every submodel... + for ( int j = 0; j < pBodyPart->nummodels; ++j ) + { + mstudiomodel_t* pModel = pBodyPart->pModel(j); + OptimizedModel::ModelHeader_t* pVtxModel = pVtxBodyPart->pModel(j); + OptimizedModel::ModelLODHeader_t *pVtxLOD = pVtxModel->pLOD( nLOD ); + + // Iterate over all the meshes.... + for ( int k = 0; k < pModel->nummeshes; ++k ) + { + Assert( pModel->nummeshes == pVtxLOD->numMeshes ); + mstudiomesh_t* pMesh = pModel->pMesh(k); + OptimizedModel::MeshHeader_t* pVtxMesh = pVtxLOD->pMesh(k); + + studiomeshdata_t* pMeshData = &pStudioLOD->m_pMeshData[pMesh->meshid]; + for ( int l = 0; l < pVtxMesh->numStripGroups; ++l ) + { + studiomeshgroup_t* pMeshGroup = &pMeshData->m_pMeshGroup[l]; + if ( !pMeshGroup->m_pMorph ) + continue; + + OptimizedModel::StripGroupHeader_t* pStripGroup = pVtxMesh->pStripGroup(l); + BuildDecalBoneMap( pStudioHdr, &nUsedBones, pBoneRemap, &nMaxBoneCount, pMesh, pStripGroup ); + } + } + } + } + + if ( nUsedBones > 1 ) + { + if ( nUsedBones > g_pMaterialSystemHardwareConfig->MaxVertexShaderBlendMatrices() ) + { + Warning( "Hardware morphing of decals will be busted! Too many unique bones on flexed vertices!\n" ); + } + + pStudioLOD->m_pHWMorphDecalBoneRemap = new int[ pStudioHdr->numbones ]; + memcpy( pStudioLOD->m_pHWMorphDecalBoneRemap, pBoneRemap, nBufSize ); + pStudioLOD->m_nDecalBoneCount = nMaxBoneCount; + } +} + + +//----------------------------------------------------------------------------- +// Hook needed by mdlcache to load the vertex data +//----------------------------------------------------------------------------- +const vertexFileHeader_t * mstudiomodel_t::CacheVertexData( void *pModelData ) +{ + // make requested data resident + return g_pStudioDataCache->CacheVertexData( (studiohdr_t *)pModelData ); +} + + +//----------------------------------------------------------------------------- +// Loads, unloads models +//----------------------------------------------------------------------------- +bool CStudioRenderContext::LoadModel( studiohdr_t *pStudioHdr, void *pVtxBuffer, studiohwdata_t *pStudioHWData ) +{ + int i; + int j; + + Assert( pStudioHdr ); + Assert( pVtxBuffer ); + Assert( pStudioHWData ); + + if ( !pStudioHdr || !pVtxBuffer || !pStudioHWData ) + return false; + + // NOTE: This must be called *after* Mod_LoadStudioModel + OptimizedModel::FileHeader_t* pVertexHdr = (OptimizedModel::FileHeader_t*)pVtxBuffer; + + if ( pVertexHdr->checkSum != pStudioHdr->checksum ) + { + ConDMsg("Error! Model %s .vtx file out of synch with .mdl\n", pStudioHdr->pszName() ); + return false; + } + + pStudioHWData->m_NumStudioMeshes = 0; + for ( i = 0; i < pStudioHdr->numbodyparts; i++ ) + { + mstudiobodyparts_t* pBodyPart = pStudioHdr->pBodypart(i); + for (j = 0; j < pBodyPart->nummodels; j++) + { + pStudioHWData->m_NumStudioMeshes += pBodyPart->pModel(j)->nummeshes; + } + } + + // Create static meshes + Assert( pVertexHdr->numLODs ); + pStudioHWData->m_RootLOD = min( (int)pStudioHdr->rootLOD, pVertexHdr->numLODs-1 ); + pStudioHWData->m_NumLODs = pVertexHdr->numLODs; + pStudioHWData->m_pLODs = new studioloddata_t[pVertexHdr->numLODs]; + memset( pStudioHWData->m_pLODs, 0, pVertexHdr->numLODs * sizeof( studioloddata_t )); + + // reset the runtime flags + pStudioHdr->flags &= ~STUDIOHDR_FLAGS_USES_ENV_CUBEMAP; + pStudioHdr->flags &= ~STUDIOHDR_FLAGS_USES_FB_TEXTURE; + pStudioHdr->flags &= ~STUDIOHDR_FLAGS_USES_BUMPMAPPING; + +#ifdef _DEBUG + int totalNumMeshGroups = 0; +#endif + int nColorMeshID = 0; + int nLodID; + for ( nLodID = pStudioHWData->m_RootLOD; nLodID < pStudioHWData->m_NumLODs; nLodID++ ) + { + // Load materials and determine material dependent mesh requirements + LoadMaterials( pStudioHdr, pVertexHdr, pStudioHWData->m_pLODs[nLodID], nLodID ); + + // build the meshes + R_StudioCreateStaticMeshes( pStudioHdr, pVertexHdr, pStudioHWData, nLodID, &nColorMeshID ); + + // Build the hardware bone remap for decal rendering using HW morphing + ComputeHWMorphDecalBoneRemap( pStudioHdr, pVertexHdr, pStudioHWData, nLodID ); + + // garymcthack - need to check for NULL here. + // save off the lod switch point + pStudioHWData->m_pLODs[nLodID].m_SwitchPoint = pVertexHdr->pBodyPart( 0 )->pModel( 0 )->pLOD( nLodID )->switchPoint; + +#ifdef _DEBUG + studioloddata_t *pLOD = &pStudioHWData->m_pLODs[nLodID]; + for ( int meshID = 0; meshID < pStudioHWData->m_NumStudioMeshes; ++meshID ) + { + totalNumMeshGroups += pLOD->m_pMeshData[meshID].m_NumGroup; + } +#endif + } + +#ifdef _DEBUG + Assert( nColorMeshID == totalNumMeshGroups ); +#endif + + return true; +} + + +void CStudioRenderContext::UnloadModel( studiohwdata_t *pHardwareData ) +{ + int i; + for ( i = pHardwareData->m_RootLOD; i < pHardwareData->m_NumLODs; i++ ) + { + int j; + for ( j = 0; j < pHardwareData->m_pLODs[i].numMaterials; j++ ) + { + if ( pHardwareData->m_pLODs[i].ppMaterials[j] ) + { + pHardwareData->m_pLODs[i].ppMaterials[j]->DecrementReferenceCount(); + } + } + delete [] pHardwareData->m_pLODs[i].ppMaterials; + delete [] pHardwareData->m_pLODs[i].pMaterialFlags; + pHardwareData->m_pLODs[i].ppMaterials = NULL; + pHardwareData->m_pLODs[i].pMaterialFlags = NULL; + } + for ( i = pHardwareData->m_RootLOD; i < pHardwareData->m_NumLODs; i++ ) + { + R_StudioDestroyStaticMeshes( pHardwareData->m_NumStudioMeshes, &pHardwareData->m_pLODs[i].m_pMeshData ); + } + delete[] pHardwareData->m_pLODs; + pHardwareData->m_pLODs = NULL; +} + + +//----------------------------------------------------------------------------- +// Refresh the studiohdr since it was lost... +//----------------------------------------------------------------------------- +void CStudioRenderContext::RefreshStudioHdr( studiohdr_t* pStudioHdr, studiohwdata_t* pHardwareData ) +{ +} + +//----------------------------------------------------------------------------- +// Set the eye view target +//----------------------------------------------------------------------------- +void CStudioRenderContext::SetEyeViewTarget( const studiohdr_t *pStudioHdr, int nBodyIndex, const Vector& viewtarget ) +{ + VectorCopy( viewtarget, m_RC.m_ViewTarget ); +} + + +//----------------------------------------------------------------------------- +// Returns information about the ambient light samples +//----------------------------------------------------------------------------- +static TableVector s_pAmbientLightDir[6] = +{ + { 1, 0, 0 }, + { -1, 0, 0 }, + { 0, 1, 0 }, + { 0, -1, 0 }, + { 0, 0, 1 }, + { 0, 0, -1 }, +}; + +int CStudioRenderContext::GetNumAmbientLightSamples() +{ + return 6; +} + +const Vector *CStudioRenderContext::GetAmbientLightDirections() +{ + return (const Vector*)s_pAmbientLightDir; +} + + +//----------------------------------------------------------------------------- +// Methods related to LOD +//----------------------------------------------------------------------------- +int CStudioRenderContext::GetNumLODs( const studiohwdata_t &hardwareData ) const +{ + return hardwareData.m_NumLODs; +} + +float CStudioRenderContext::GetLODSwitchValue( const studiohwdata_t &hardwareData, int nLOD ) const +{ + return hardwareData.m_pLODs[nLOD].m_SwitchPoint; +} + +void CStudioRenderContext::SetLODSwitchValue( studiohwdata_t &hardwareData, int nLOD, float flSwitchValue ) +{ + // NOTE: This must block the hardware thread since it reads this data. + // This method is only used in tools, though. + MaterialLock_t hLock = g_pMaterialSystem->Lock(); + hardwareData.m_pLODs[nLOD].m_SwitchPoint = flSwitchValue; + g_pMaterialSystem->Unlock( hLock ); +} + + +//----------------------------------------------------------------------------- +// Returns the first n materials. The studiohdr material list is the superset +// for all lods. +//----------------------------------------------------------------------------- +int CStudioRenderContext::GetMaterialList( studiohdr_t *pStudioHdr, int count, IMaterial** ppMaterials ) +{ + AssertMsg( pStudioHdr, "Don't ignore this assert! CStudioRenderContext::GetMaterialList() has null pStudioHdr." ); + + if ( !pStudioHdr ) + return 0; + + if ( pStudioHdr->textureindex == 0 ) + return 0; + + // iterate each texture + int i; + int j; + int found = 0; + for ( i = 0; i < pStudioHdr->numtextures; i++ ) + { + char szPath[MAX_PATH]; + IMaterial *pMaterial = NULL; + + // iterate quietly through all specified directories until a valid material is found + for ( j = 0; j < pStudioHdr->numcdtextures && IsErrorMaterial( pMaterial ); j++ ) + { + // If we don't do this, we get filenames like "materials\\blah.vmt". + const char *textureName = pStudioHdr->pTexture( i )->pszName(); + if ( textureName[0] == CORRECT_PATH_SEPARATOR || textureName[0] == INCORRECT_PATH_SEPARATOR ) + ++textureName; + + // This prevents filenames like /models/blah.vmt. + const char *pCdTexture = pStudioHdr->pCdtexture( j ); + if ( pCdTexture[0] == CORRECT_PATH_SEPARATOR || pCdTexture[0] == INCORRECT_PATH_SEPARATOR ) + ++pCdTexture; + + V_ComposeFileName( pCdTexture, textureName, szPath, sizeof( szPath ) ); + + if ( pStudioHdr->flags & STUDIOHDR_FLAGS_OBSOLETE ) + { + pMaterial = g_pMaterialSystem->FindMaterialEx( "models/obsolete/obsolete", TEXTURE_GROUP_MODEL, MATERIAL_FINDCONTEXT_ISONAMODEL, false ); + } + else + { + pMaterial = g_pMaterialSystem->FindMaterialEx( szPath, TEXTURE_GROUP_MODEL, MATERIAL_FINDCONTEXT_ISONAMODEL, false ); + } + } + + if ( !pMaterial ) + continue; + + if ( found < count ) + { + int k; + for ( k=0; k<found; k++ ) + { + if ( ppMaterials[k] == pMaterial ) + break; + } + if ( k >= found ) + { + // add uniquely + ppMaterials[found++] = pMaterial; + } + } + else + { + break; + } + } + + return found; +} + + +int CStudioRenderContext::GetMaterialListFromBodyAndSkin( MDLHandle_t studio, int nSkin, int nBody, int nCountOutputMaterials, IMaterial** ppOutputMaterials ) +{ + int found = 0; + + studiohwdata_t *pStudioHWData = g_pMDLCache->GetHardwareData( studio ); + if ( pStudioHWData == NULL ) + return 0; + + for ( int lodID = pStudioHWData->m_RootLOD; lodID < pStudioHWData->m_NumLODs; lodID++ ) + { + studiohdr_t *pStudioHdr = g_pMDLCache->GetStudioHdr( studio ); + IMaterial **ppInputMaterials = pStudioHWData->m_pLODs[lodID].ppMaterials; + + if ( nSkin >= pStudioHdr->numskinfamilies ) + { + nSkin = 0; + } + + short *pSkinRef = pStudioHdr->pSkinref( nSkin * pStudioHdr->numskinref ); + + for (int i=0 ; i < pStudioHdr->numbodyparts ; i++) + { + mstudiomodel_t *pModel = NULL; + R_StudioSetupModel( i, nBody, &pModel, pStudioHdr ); + + // Iterate over all the meshes.... each mesh is a new material + for( int k = 0; k < pModel->nummeshes; ++k ) + { + mstudiomesh_t *pMesh = pModel->pMesh(k); + IMaterial *pMaterial = ppInputMaterials[pSkinRef[pMesh->material]]; + Assert( pMaterial ); + + int m; + for ( m=0; m<found; m++ ) + { + if ( ppOutputMaterials[m] == pMaterial ) + break; + } + if ( m >= found ) + { + // add uniquely + ppOutputMaterials[found++] = pMaterial; + + // No more room to store additional materials! + if ( found >= nCountOutputMaterials ) + return found; + } + } + } + } + + return found; +} + + +//----------------------------------------------------------------------------- +// Returns perf stats about a particular model +//----------------------------------------------------------------------------- +void CStudioRenderContext::GetPerfStats( DrawModelResults_t *pResults, const DrawModelInfo_t &info, CUtlBuffer *pSpewBuf ) const +{ + pResults->m_ActualTriCount = pResults->m_TextureMemoryBytes = 0; + pResults->m_Materials.RemoveAll(); + + Assert( info.m_Lod >= 0 ); + if ( info.m_Lod < 0 || !info.m_pHardwareData->m_pLODs ) + return; + + studiomeshdata_t *pStudioMeshes = info.m_pHardwareData->m_pLODs[info.m_Lod].m_pMeshData; + + // Set up an array that keeps up with the number of used hardware bones in the models. + CUtlVector<bool> hardwareBonesUsed; + hardwareBonesUsed.EnsureCount( info.m_pStudioHdr->numbones ); + int i; + for( i = 0; i < info.m_pStudioHdr->numbones; i++ ) + { + hardwareBonesUsed[i] = false; + } + + // Warning( "\n\n\n" ); + pResults->m_NumMaterials = 0; + int numBoneStateChangeBatches = 0; + int numBoneStateChanges = 0; + // Iterate over every submodel... + IMaterial **ppMaterials = info.m_pHardwareData->m_pLODs[info.m_Lod].ppMaterials; + + int nSkin = info.m_Skin; + if ( nSkin >= info.m_pStudioHdr->numskinfamilies ) + { + nSkin = 0; + } + short *pSkinRef = info.m_pStudioHdr->pSkinref( nSkin * info.m_pStudioHdr->numskinref ); + + pResults->m_NumBatches = 0; + + for (i=0 ; i < info.m_pStudioHdr->numbodyparts ; i++) + { + mstudiomodel_t *pModel = NULL; + R_StudioSetupModel( i, info.m_Body, &pModel, info.m_pStudioHdr ); + + // Iterate over all the meshes.... each mesh is a new material + int k; + for( k = 0; k < pModel->nummeshes; ++k ) + { + mstudiomesh_t *pMesh = pModel->pMesh(k); + IMaterial *pMaterial = ppMaterials[pSkinRef[pMesh->material]]; + Assert( pMaterial ); + studiomeshdata_t *pMeshData = &pStudioMeshes[pMesh->meshid]; + if( pMeshData->m_NumGroup == 0 ) + continue; + + Assert( pResults->m_NumMaterials == pResults->m_Materials.Count() ); + pResults->m_NumMaterials++; + if( pResults->m_NumMaterials < MAX_DRAW_MODEL_INFO_MATERIALS ) + { + pResults->m_Materials.AddToTail( pMaterial ); + } + else + { + Assert( 0 ); + } + if( pSpewBuf ) + { + pSpewBuf->Printf( " material: %s\n", pMaterial->GetName() ); + } + int numPasses = m_RC.m_pForcedMaterial ? m_RC.m_pForcedMaterial->GetNumPasses() : pMaterial->GetNumPasses(); + if( pSpewBuf ) + { + pSpewBuf->Printf( " numPasses:%d\n", numPasses ); + } + int bytes = pMaterial->GetTextureMemoryBytes(); + pResults->m_TextureMemoryBytes += bytes; + if( pSpewBuf ) + { + pSpewBuf->Printf( " texture memory: %d (Only valid in a rendering app)\n", bytes ); + } + + // Iterate over all stripgroups + int stripGroupID; + for( stripGroupID = 0; stripGroupID < pMeshData->m_NumGroup; stripGroupID++ ) + { + studiomeshgroup_t *pMeshGroup = &pMeshData->m_pMeshGroup[stripGroupID]; + bool bIsFlexed = ( pMeshGroup->m_Flags & MESHGROUP_IS_FLEXED ) != 0; + bool bIsHWSkinned = ( pMeshGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) != 0; + + if( pSpewBuf ) + { + pSpewBuf->Printf( " %d batch(es):\n", ( int )pMeshGroup->m_NumStrips ); + } + // Iterate over all strips. . . each strip potentially changes bones states. + int stripID; + for( stripID = 0; stripID < pMeshGroup->m_NumStrips; stripID++ ) + { + pResults->m_NumBatches++; + + OptimizedModel::StripHeader_t *pStripData = &pMeshGroup->m_pStripData[stripID]; + numBoneStateChangeBatches++; + numBoneStateChanges += pStripData->numBoneStateChanges; + + if( bIsHWSkinned ) + { + // Only count bones as hardware bones if we are using hardware skinning here. + int boneID; + for( boneID = 0; boneID < pStripData->numBoneStateChanges; boneID++ ) + { + OptimizedModel::BoneStateChangeHeader_t *pBoneStateChange = pStripData->pBoneStateChange( boneID ); + hardwareBonesUsed[pBoneStateChange->newBoneID] = true; + } + } + + if( pStripData->flags & OptimizedModel::STRIP_IS_TRILIST ) + { + // TODO: need to factor in bIsFlexed and bIsHWSkinned + int numTris = pStripData->numIndices / 3; + if( pSpewBuf ) + { + pSpewBuf->Printf( " %s%s", bIsFlexed ? "flexed " : "nonflexed ", + bIsHWSkinned ? "hwskinned " : "swskinned " ); + pSpewBuf->Printf( "tris: %d ", numTris ); + pSpewBuf->Printf( "bone changes: %d bones/strip: %d\n", pStripData->numBoneStateChanges, + ( int )pStripData->numBones ); + } + pResults->m_ActualTriCount += numTris * numPasses; + } + else if( pStripData->flags & OptimizedModel::STRIP_IS_TRISTRIP ) + { + Assert( 0 ); // FIXME: fill this in when we start using strips again. + } + else + { + Assert( 0 ); + } + } + } + } + } + if( pSpewBuf ) + { + char nil = '\0'; + pSpewBuf->Put( &nil, 1 );; + } + + pResults->m_NumHardwareBones = 0; + for( i = 0; i < info.m_pStudioHdr->numbones; i++ ) + { + if( hardwareBonesUsed[i] ) + { + pResults->m_NumHardwareBones++; + } + } +} + + +//----------------------------------------------------------------------------- +// Begin/end frame +//----------------------------------------------------------------------------- +static ConVar r_hwmorph( "r_hwmorph", "1", FCVAR_CHEAT ); + +void CStudioRenderContext::BeginFrame( void ) +{ + // Cache a few values here so I don't have to in software inner loops: + Assert( g_pMaterialSystemHardwareConfig ); + m_RC.m_Config.m_bSupportsVertexAndPixelShaders = g_pMaterialSystemHardwareConfig->SupportsVertexAndPixelShaders(); + m_RC.m_Config.m_bSupportsOverbright = g_pMaterialSystemHardwareConfig->SupportsOverbright(); + m_RC.m_Config.m_bEnableHWMorph = r_hwmorph.GetInt() != 0; + + // Haven't implemented the hw morph with threading yet + if ( g_pMaterialSystem->GetThreadMode() != MATERIAL_SINGLE_THREADED ) + { + m_RC.m_Config.m_bEnableHWMorph = false; + } + + m_RC.m_Config.m_bStatsMode = false; + + g_pStudioRenderImp->PrecacheGlint(); +} + +void CStudioRenderContext::EndFrame( void ) +{ +} + + +//----------------------------------------------------------------------------- +// Methods related to config +//----------------------------------------------------------------------------- +void CStudioRenderContext::UpdateConfig( const StudioRenderConfig_t& config ) +{ + memcpy( &m_RC.m_Config, &config, sizeof( StudioRenderConfig_t ) ); +} + +void CStudioRenderContext::GetCurrentConfig( StudioRenderConfig_t& config ) +{ + memcpy( &config, &m_RC.m_Config, sizeof( StudioRenderConfig_t ) ); +} + + +//----------------------------------------------------------------------------- +// Material overrides +//----------------------------------------------------------------------------- +void CStudioRenderContext::ForcedMaterialOverride( IMaterial *newMaterial, OverrideType_t nOverrideType ) +{ + m_RC.m_pForcedMaterial = newMaterial; + m_RC.m_nForcedMaterialType = nOverrideType; +} + +//----------------------------------------------------------------------------- +// Return the material overrides +//----------------------------------------------------------------------------- +void CStudioRenderContext::GetMaterialOverride( IMaterial** ppOutForcedMaterial, OverrideType_t* pOutOverrideType ) +{ + Assert( ppOutForcedMaterial != NULL && pOutOverrideType != NULL ); + *ppOutForcedMaterial = m_RC.m_pForcedMaterial; + *pOutOverrideType = m_RC.m_nForcedMaterialType; +} + +//----------------------------------------------------------------------------- +// Sets the view state +//----------------------------------------------------------------------------- +void CStudioRenderContext::SetViewState( const Vector& viewOrigin, + const Vector& viewRight, const Vector& viewUp, const Vector& viewPlaneNormal ) +{ + VectorCopy( viewOrigin, m_RC.m_ViewOrigin ); + VectorCopy( viewRight, m_RC.m_ViewRight ); + VectorCopy( viewUp, m_RC.m_ViewUp ); + VectorCopy( viewPlaneNormal, m_RC.m_ViewPlaneNormal ); +} + + +//----------------------------------------------------------------------------- +// Sets lighting state +//----------------------------------------------------------------------------- +void CStudioRenderContext::SetAmbientLightColors( const Vector *pColors ) +{ + for( int i = 0; i < 6; i++ ) + { + VectorCopy( pColors[i], m_RC.m_LightBoxColors[i].AsVector3D() ); + m_RC.m_LightBoxColors[i][3] = 1.0f; + } + + // FIXME: Would like to get this into the render thread, but there's systemic confusion + // about whether to set lighting state here or in the material system + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->SetAmbientLightCube( m_RC.m_LightBoxColors ); +} + +void CStudioRenderContext::SetAmbientLightColors( const Vector4D *pColors ) +{ + memcpy( m_RC.m_LightBoxColors, pColors, 6 * sizeof(Vector4D) ); + + // FIXME: Would like to get this into the render thread, but there's systemic confusion + // about whether to set lighting state here or in the material system + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->SetAmbientLightCube( m_RC.m_LightBoxColors ); +} + +void CStudioRenderContext::SetLocalLights( int nLightCount, const LightDesc_t *pLights ) +{ + m_RC.m_NumLocalLights = CopyLocalLightingState( MAXLOCALLIGHTS, m_RC.m_LocalLights, nLightCount, pLights ); + + // FIXME: Would like to get this into the render thread, but there's systemic confusion + // about whether to set lighting state here or in the material system + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + if ( m_RC.m_Config.bSoftwareLighting || m_RC.m_NumLocalLights == 0 ) + { + pRenderContext->DisableAllLocalLights(); + } + else + { + int i; + int nMaxLightCount = g_pMaterialSystemHardwareConfig->MaxNumLights(); + int nLightCount = min( m_RC.m_NumLocalLights, nMaxLightCount ); + for( i = 0; i < nLightCount; i++ ) + { + pRenderContext->SetLight( i, m_RC.m_LocalLights[i] ); + } + for( ; i < nMaxLightCount; i++ ) + { + LightDesc_t desc; + desc.m_Type = MATERIAL_LIGHT_DISABLE; + pRenderContext->SetLight( i, desc ); + } + } +} + + +//----------------------------------------------------------------------------- +// Sets the color modulation +//----------------------------------------------------------------------------- +void CStudioRenderContext::SetColorModulation( const float* pColor ) +{ + VectorCopy( pColor, m_RC.m_ColorMod ); +} + +void CStudioRenderContext::SetAlphaModulation( float alpha ) +{ + m_RC.m_AlphaMod = alpha; +} + + +//----------------------------------------------------------------------------- +// Used to set bone-to-world transforms. +// FIXME: Should this be a lock/unlock pattern so we can't read after unlock? +//----------------------------------------------------------------------------- +matrix3x4_t* CStudioRenderContext::LockBoneMatrices( int nCount ) +{ + MEM_ALLOC_CREDIT_( "CStudioRenderContext::m_BoneToWorldMatrices" ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + CMatRenderData<matrix3x4_t> rdMatrix( pRenderContext ); + matrix3x4_t *pDest = rdMatrix.Lock( nCount ); + return pDest; +} + +void CStudioRenderContext::UnlockBoneMatrices() +{ +} + + +//----------------------------------------------------------------------------- +// Allocates flex weights +//----------------------------------------------------------------------------- +void CStudioRenderContext::LockFlexWeights( int nWeightCount, float **ppFlexWeights, float **ppFlexDelayedWeights ) +{ + MEM_ALLOC_CREDIT_( "CStudioRenderContext::m_FlexWeights" ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + CMatRenderData<float> rdFlex( pRenderContext ); + CMatRenderData<float> rdFlexDelayed( pRenderContext ); + float *pFlexOut = rdFlex.Lock( nWeightCount ); + for ( int i = 0; i < nWeightCount; i++ ) + { + pFlexOut[i] = 0.0f; + } + *ppFlexWeights = pFlexOut; + if ( ppFlexDelayedWeights ) + { + pFlexOut = rdFlexDelayed.Lock( nWeightCount ); + for ( int i = 0; i < nWeightCount; i++ ) + { + pFlexOut[i] = 0.0f; + } + *ppFlexDelayedWeights = pFlexOut; + } +} + +void CStudioRenderContext::UnlockFlexWeights() +{ +} + + +//----------------------------------------------------------------------------- +// Methods related to flex weights +//----------------------------------------------------------------------------- +static ConVar r_randomflex( "r_randomflex", "0", FCVAR_CHEAT ); + + +//----------------------------------------------------------------------------- +// This will generate random flex data that has a specified # of non-zero values +//----------------------------------------------------------------------------- +void CStudioRenderContext::GenerateRandomFlexWeights( int nWeightCount, float* pWeights, float *pDelayedWeights ) +{ + int nRandomFlex = r_randomflex.GetInt(); + if ( nRandomFlex <= 0 || !pWeights ) + return; + + if ( nRandomFlex > nWeightCount ) + { + nRandomFlex = nWeightCount; + } + + int *pIndices = (int*)_alloca( nWeightCount * sizeof(int) ); + for ( int i = 0; i < nWeightCount; ++i ) + { + pIndices[i] = i; + } + + // Shuffle + for ( int i = 0; i < nWeightCount; ++i ) + { + int n = RandomInt( 0, nWeightCount-1 ); + int nTemp = pIndices[n]; + pIndices[n] = pIndices[i]; + pIndices[i] = nTemp; + } + + memset( pWeights, 0, nWeightCount * sizeof(float) ); + for ( int i = 0; i < nRandomFlex; ++i ) + { + pWeights[ pIndices[i] ] = RandomFloat( 0.0f, 1.0f ); + } + if ( pDelayedWeights ) + { + memset( pDelayedWeights, 0, nWeightCount * sizeof(float) ); + for ( int i = 0; i < nRandomFlex; ++i ) + { + pDelayedWeights[ pIndices[i] ] = RandomFloat( 0.0f, 1.0f ); + } + } +} + + +//----------------------------------------------------------------------------- +// Computes LOD +//----------------------------------------------------------------------------- +int CStudioRenderContext::ComputeRenderLOD( IMatRenderContext *pRenderContext, + const DrawModelInfo_t& info, const Vector &origin, float *pMetric ) +{ + int lod = info.m_Lod; + int lastlod = info.m_pHardwareData->m_NumLODs - 1; + + if ( pMetric ) + { + *pMetric = 0.0f; + } + + if ( lod == USESHADOWLOD ) + return lastlod; + + if ( lod != -1 ) + return clamp( lod, info.m_pHardwareData->m_RootLOD, lastlod ); + + float screenSize = pRenderContext->ComputePixelWidthOfSphere( origin, 0.5f ); + lod = ComputeModelLODAndMetric( info.m_pHardwareData, screenSize, pMetric ); + + // make sure we have a valid lod + if ( info.m_pStudioHdr->flags & STUDIOHDR_FLAGS_HASSHADOWLOD ) + { + lastlod--; + } + + lod = clamp( lod, info.m_pHardwareData->m_RootLOD, lastlod ); + return lod; +} + + +//----------------------------------------------------------------------------- +// This invokes proxies of all materials that are queued to be rendered +// It has the effect of ensuring the material vars are in the correct state +// since material var sets generated by the proxy bind are queued. +//----------------------------------------------------------------------------- +void CStudioRenderContext::InvokeBindProxies( const DrawModelInfo_t &info ) +{ + if ( m_RC.m_pForcedMaterial ) + { + if ( m_RC.m_nForcedMaterialType == OVERRIDE_NORMAL && m_RC.m_pForcedMaterial->HasProxy() ) + { + m_RC.m_pForcedMaterial->CallBindProxy( info.m_pClientEntity ); + } + return; + } + + // get skinref array + int nSkin = ( m_RC.m_Config.skin > 0 ) ? m_RC.m_Config.skin : info.m_Skin; + short *pSkinRef = info.m_pStudioHdr->pSkinref( 0 ); + if ( nSkin > 0 && nSkin < info.m_pStudioHdr->numskinfamilies ) + { + pSkinRef += ( nSkin * info.m_pStudioHdr->numskinref ); + } + + // This is used to ensure proxies are only called once + int nBufSize = info.m_pStudioHdr->numtextures * sizeof(bool); + bool *pProxyCalled = (bool*)stackalloc( nBufSize ); + memset( pProxyCalled, 0, nBufSize ); + + IMaterial **ppMaterials = info.m_pHardwareData->m_pLODs[ info.m_Lod ].ppMaterials; + mstudiomodel_t *pModel; + for ( int i=0 ; i < info.m_pStudioHdr->numbodyparts; ++i ) + { + R_StudioSetupModel( i, info.m_Body, &pModel, info.m_pStudioHdr ); + for ( int somethingOtherThanI = 0; somethingOtherThanI < pModel->nummeshes; ++somethingOtherThanI) + { + mstudiomesh_t *pMesh = pModel->pMesh(somethingOtherThanI); + int nMaterialIndex = pSkinRef[ pMesh->material ]; + if ( pProxyCalled[ nMaterialIndex ] ) + continue; + pProxyCalled[ nMaterialIndex ] = true; + IMaterial* pMaterial = ppMaterials[ nMaterialIndex ]; + if ( pMaterial && pMaterial->HasProxy() ) + { + pMaterial->CallBindProxy( info.m_pClientEntity ); + } + } + } +} + + +//----------------------------------------------------------------------------- +// Draws a model +//----------------------------------------------------------------------------- +void CStudioRenderContext::DrawModel( DrawModelResults_t *pResults, const DrawModelInfo_t& info, + matrix3x4_t *pBoneToWorld, float *pFlexWeights, float *pFlexDelayedWeights, const Vector &origin, int flags ) +{ + // Set to zero in case we don't render anything. + if ( pResults ) + { + pResults->m_ActualTriCount = pResults->m_TextureMemoryBytes = 0; + } + + if( !info.m_pStudioHdr || !info.m_pHardwareData || + !info.m_pHardwareData->m_NumLODs || !info.m_pHardwareData->m_pLODs ) + { + return; + } + + // Replace the flex weight data with random data for testing + GenerateRandomFlexWeights( info.m_pStudioHdr->numflexdesc, pFlexWeights, pFlexDelayedWeights ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + float flMetric; + const_cast<DrawModelInfo_t*>( &info )->m_Lod = ComputeRenderLOD( pRenderContext, info, origin, &flMetric ); + if ( pResults ) + { + pResults->m_nLODUsed = info.m_Lod; + pResults->m_flLODMetric = flMetric; + } + + MaterialLock_t hLock = 0; + if ( flags & STUDIORENDER_DRAW_ACCURATETIME ) + { + VPROF("STUDIORENDER_DRAW_ACCURATETIME"); + + // Flush the material system before timing this model: + hLock = g_pMaterialSystem->Lock(); + g_pMaterialSystem->Flush(true); + } + + if ( pResults ) + { + pResults->m_RenderTime.Start(); + } + + FlexWeights_t flex; + flex.m_pFlexWeights = pFlexWeights ? pFlexWeights : s_pZeroFlexWeights; + flex.m_pFlexDelayedWeights = pFlexDelayedWeights ? pFlexDelayedWeights : flex.m_pFlexWeights; + + ICallQueue *pCallQueue = pRenderContext->GetCallQueue(); + if ( !pCallQueue || studio_queue_mode.GetInt() == 0 ) + { + g_pStudioRenderImp->DrawModel( info, m_RC, pBoneToWorld, flex, flags ); + } + else + { + CMatRenderData<matrix3x4_t> rdMatrix( pRenderContext, info.m_pStudioHdr->numbones, pBoneToWorld ); + CMatRenderData<float> rdFlex( pRenderContext ); + CMatRenderData<float> rdFlexDelayed( pRenderContext ); + + InvokeBindProxies( info ); + pBoneToWorld = rdMatrix.Base(); + if ( info.m_pStudioHdr->numflexdesc != 0 ) + { + rdFlex.Lock( info.m_pStudioHdr->numflexdesc, flex.m_pFlexWeights ); + flex.m_pFlexWeights = rdFlex.Base(); + if ( !pFlexDelayedWeights ) + { + flex.m_pFlexDelayedWeights = flex.m_pFlexWeights; + } + else + { + rdFlexDelayed.Lock( info.m_pStudioHdr->numflexdesc, flex.m_pFlexDelayedWeights ); + flex.m_pFlexDelayedWeights = rdFlexDelayed.Base(); + } + } + pCallQueue->QueueCall( g_pStudioRenderImp, &CStudioRender::DrawModel, info, m_RC, pBoneToWorld, flex, flags ); + } + + if( flags & STUDIORENDER_DRAW_ACCURATETIME ) + { + VPROF( "STUDIORENDER_DRAW_ACCURATETIME" ); + + // Make sure this model is completely drawn before ending the timer: + g_pMaterialSystem->Flush(true); + g_pMaterialSystem->Flush(true); + g_pMaterialSystem->Unlock( hLock ); + } + + if ( pResults ) + { + pResults->m_RenderTime.End(); + if( flags & STUDIORENDER_DRAW_GET_PERF_STATS ) + { + GetPerfStats( pResults, info, 0 ); + } + } +} + + +void CStudioRenderContext::DrawModelArray( const DrawModelInfo_t &drawInfo, int arrayCount, model_array_instance_t *pInstanceData, int instanceStride, int flags ) +{ + // UNDONE: Support queue mode? + g_pStudioRenderImp->DrawModelArray( drawInfo, m_RC, arrayCount, pInstanceData, instanceStride, flags ); +} + +//----------------------------------------------------------------------------- +// Methods related to rendering static props +//----------------------------------------------------------------------------- +void CStudioRenderContext::DrawModelStaticProp( const DrawModelInfo_t& info, const matrix3x4_t &modelToWorld, int flags ) +{ + if ( info.m_Lod < info.m_pHardwareData->m_RootLOD ) + { + const_cast< DrawModelInfo_t* >( &info )->m_Lod = info.m_pHardwareData->m_RootLOD; + } + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + ICallQueue *pCallQueue = pRenderContext->GetCallQueue(); + if ( !pCallQueue || studio_queue_mode.GetInt() == 0 ) + { + g_pStudioRenderImp->DrawModelStaticProp( info, m_RC, modelToWorld, flags ); + } + else + { + InvokeBindProxies( info ); + pCallQueue->QueueCall( g_pStudioRenderImp, &CStudioRender::DrawModelStaticProp, info, m_RC, modelToWorld, flags ); + } +} + +void CStudioRenderContext::DrawStaticPropDecals( const DrawModelInfo_t &info, const matrix3x4_t &modelToWorld ) +{ + QUEUE_STUDIORENDER_CALL( DrawStaticPropDecals, CStudioRender, g_pStudioRenderImp, info, m_RC, modelToWorld ); +} + +void CStudioRenderContext::DrawStaticPropShadows( const DrawModelInfo_t &info, const matrix3x4_t &modelToWorld, int flags ) +{ + QUEUE_STUDIORENDER_CALL( DrawStaticPropShadows, CStudioRender, g_pStudioRenderImp, info, m_RC, modelToWorld, flags ); +} + + +//----------------------------------------------------------------------------- +// Methods related to shadows +//----------------------------------------------------------------------------- +void CStudioRenderContext::AddShadow( IMaterial* pMaterial, void* pProxyData, + FlashlightState_t *pFlashlightState, VMatrix *pWorldToTexture, ITexture *pFlashlightDepthTexture ) +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + ICallQueue *pCallQueue = pRenderContext->GetCallQueue(); + if ( !pCallQueue || studio_queue_mode.GetInt() == 0 ) + { + g_pStudioRenderImp->AddShadow( pMaterial, pProxyData, pFlashlightState, pWorldToTexture, pFlashlightDepthTexture ); + } + else + { + // NOTE: We don't need to make proxies work, because proxies are only ever used + // when casting shadows onto props, which we don't do..that feature is disabled. + // When casting flashlights onto mdls, which we *do* use, the proxy is NULL. + Assert( pProxyData == NULL ); + if ( pProxyData != NULL ) + { + Warning( "Cannot call CStudioRenderContext::AddShadows w/ proxies in queued mode!\n" ); + return; + } + + CMatRenderData< FlashlightState_t > rdFlashlight( pRenderContext, 1, pFlashlightState ); + CMatRenderData< VMatrix > rdMatrix( pRenderContext, 1, pWorldToTexture ); + pCallQueue->QueueCall( g_pStudioRenderImp, &CStudioRender::AddShadow, pMaterial, + (void*)NULL, rdFlashlight.Base(), rdMatrix.Base(), pFlashlightDepthTexture ); + } +} + +void CStudioRenderContext::ClearAllShadows() +{ + QUEUE_STUDIORENDER_CALL( ClearAllShadows, CStudioRender, g_pStudioRenderImp ); +} + + +//----------------------------------------------------------------------------- +// Methods related to decals +//----------------------------------------------------------------------------- +void CStudioRenderContext::DestroyDecalList( StudioDecalHandle_t handle ) +{ + QUEUE_STUDIORENDER_CALL( DestroyDecalList, CStudioRender, g_pStudioRenderImp, handle ); +} + +void CStudioRenderContext::AddDecal( StudioDecalHandle_t handle, studiohdr_t *pStudioHdr, + matrix3x4_t *pBoneToWorld, const Ray_t& ray, const Vector& decalUp, + IMaterial* pDecalMaterial, float radius, int body, bool noPokethru, int maxLODToDecal ) +{ + // This substition always has to be done in the main thread, so do it here. + pDecalMaterial = GetModelSpecificDecalMaterial( pDecalMaterial ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + Assert( pRenderContext->IsRenderData( pBoneToWorld ) ); + QUEUE_STUDIORENDER_CALL_RC( AddDecal, CStudioRender, g_pStudioRenderImp, pRenderContext, + handle, m_RC, pBoneToWorld, pStudioHdr, ray, decalUp, pDecalMaterial, radius, + body, noPokethru, maxLODToDecal ); +} + +// Function to do replacement because we always need to do this from the main thread. +IMaterial* GetModelSpecificDecalMaterial( IMaterial* pDecalMaterial ) +{ + Assert( ThreadInMainThread() ); + // Since we're adding this to a studio model, check the decal to see if + // there's an alternate form used for static props... + bool found; + IMaterialVar* pModelMaterialVar = pDecalMaterial->FindVar( "$modelmaterial", &found, false ); + if ( found ) + { + IMaterial* pModelMaterial = g_pMaterialSystem->FindMaterial( pModelMaterialVar->GetStringValue(), TEXTURE_GROUP_DECAL, false ); + if ( !IsErrorMaterial( pModelMaterial ) ) + { + return pModelMaterial; + } + } + + return pDecalMaterial; +} + + diff --git a/studiorender/studiorendercontext.h b/studiorender/studiorendercontext.h new file mode 100644 index 0000000..c5e654b --- /dev/null +++ b/studiorender/studiorendercontext.h @@ -0,0 +1,246 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#ifndef STUDIORENDERCONTEXT_H +#define STUDIORENDERCONTEXT_H +#ifdef _WIN32 +#pragma once +#endif + +#include "istudiorender.h" +#include "tier3/tier3.h" +#include "studio.h" +#include "tier1/delegates.h" +#include "tier1/memstack.h" +#include "studiorender.h" + + +//----------------------------------------------------------------------------- +// Foward declarations +//----------------------------------------------------------------------------- +class IStudioDataCache; +class CStudioRender; + + +//----------------------------------------------------------------------------- +// Global interfaces +//----------------------------------------------------------------------------- +extern IStudioDataCache *g_pStudioDataCache; +extern CStudioRender *g_pStudioRenderImp; + +IMaterial* GetModelSpecificDecalMaterial( IMaterial* pDecalMaterial ); + +//----------------------------------------------------------------------------- +// Internal config structure +//----------------------------------------------------------------------------- +struct StudioRenderConfigInternal_t : public StudioRenderConfig_t +{ + bool m_bSupportsVertexAndPixelShaders : 1; + bool m_bSupportsOverbright : 1; + bool m_bEnableHWMorph : 1; + bool m_bStatsMode : 1; +}; + + +//----------------------------------------------------------------------------- +// All the data needed to render a studiomodel +//----------------------------------------------------------------------------- +struct FlexWeights_t +{ + float *m_pFlexWeights; + float *m_pFlexDelayedWeights; +}; + +struct StudioRenderContext_t +{ + StudioRenderConfigInternal_t m_Config; + Vector m_ViewTarget; + Vector m_ViewOrigin; + Vector m_ViewRight; + Vector m_ViewUp; + Vector m_ViewPlaneNormal; + Vector4D m_LightBoxColors[6]; + LightDesc_t m_LocalLights[MAXLOCALLIGHTS]; + int m_NumLocalLights; + float m_ColorMod[3]; + float m_AlphaMod; + IMaterial* m_pForcedMaterial; + OverrideType_t m_nForcedMaterialType; +}; + + +//----------------------------------------------------------------------------- +// Helper to queue up calls if necessary +//----------------------------------------------------------------------------- +#define QUEUE_STUDIORENDER_CALL( FuncName, ClassName, pObject, ... ) \ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); \ + ICallQueue *pCallQueue = pRenderContext->GetCallQueue(); \ + if ( !pCallQueue || studio_queue_mode.GetInt() == 0 ) \ + { \ + pObject->FuncName( __VA_ARGS__ ); \ + } \ + else \ + { \ + pCallQueue->QueueCall( pObject, &ClassName::FuncName, ##__VA_ARGS__ ); \ + } + +#define QUEUE_STUDIORENDER_CALL_RC( FuncName, ClassName, pObject, pRenderContext, ... ) \ + ICallQueue *pCallQueue = pRenderContext->GetCallQueue(); \ + if ( !pCallQueue || studio_queue_mode.GetInt() == 0 ) \ + { \ + pObject->FuncName( __VA_ARGS__ ); \ + } \ + else \ + { \ + pCallQueue->QueueCall( pObject, &ClassName::FuncName, ##__VA_ARGS__ ); \ + } + + +//----------------------------------------------------------------------------- +// Implementation of IStudioRender +//----------------------------------------------------------------------------- +class CStudioRenderContext : public CTier3AppSystem< IStudioRender > +{ + typedef CTier3AppSystem< IStudioRender > BaseClass; + + // Methods of IAppSystem +public: + virtual bool Connect( CreateInterfaceFn factory ); + virtual void Disconnect(); + virtual void *QueryInterface( const char *pInterfaceName ); + virtual InitReturnVal_t Init(); + virtual void Shutdown(); + + // Methods of IStudioRender +public: + virtual void BeginFrame( void ); + virtual void EndFrame( void ); + virtual void Mat_Stub( IMaterialSystem *pMatSys ); + virtual void UpdateConfig( const StudioRenderConfig_t& config ); + virtual void GetCurrentConfig( StudioRenderConfig_t& config ); + virtual bool LoadModel(studiohdr_t *pStudioHdr, void *pVtxData, studiohwdata_t *pHardwareData); + virtual void UnloadModel( studiohwdata_t *pHardwareData ); + virtual void RefreshStudioHdr( studiohdr_t* pStudioHdr, studiohwdata_t* pHardwareData ); + virtual void SetEyeViewTarget( const studiohdr_t *pStudioHdr, int nBodyIndex, const Vector& worldPosition ); + virtual void SetAmbientLightColors( const Vector *pAmbientOnlyColors ); + virtual void SetAmbientLightColors( const Vector4D *pAmbientOnlyColors ); + virtual void SetLocalLights( int numLights, const LightDesc_t *pLights ); + virtual int GetNumAmbientLightSamples(); + virtual const Vector *GetAmbientLightDirections(); + virtual void SetViewState( const Vector& viewOrigin, const Vector& viewRight, const Vector& viewUp, const Vector& viewPlaneNormal ); + virtual int GetNumLODs( const studiohwdata_t &hardwareData ) const; + virtual float GetLODSwitchValue( const studiohwdata_t &hardwareData, int lod ) const; + virtual void SetLODSwitchValue( studiohwdata_t &hardwareData, int lod, float switchValue ); + virtual void SetColorModulation( const float* pColor ); + virtual void SetAlphaModulation( float alpha ); + virtual void DrawModel( DrawModelResults_t *pResults, const DrawModelInfo_t& info, matrix3x4_t *pCustomBoneToWorld, float *pFlexWeights, float *pFlexDelayedWeights, const Vector& origin, int flags = STUDIORENDER_DRAW_ENTIRE_MODEL ); + virtual void DrawModelArray( const DrawModelInfo_t &drawInfo, int arrayCount, model_array_instance_t *pInstanceData, int instanceStride, int flags = STUDIORENDER_DRAW_ENTIRE_MODEL ); + virtual void DrawModelStaticProp( const DrawModelInfo_t& info, const matrix3x4_t &modelToWorld, int flags = STUDIORENDER_DRAW_ENTIRE_MODEL ); + virtual void DrawStaticPropDecals( const DrawModelInfo_t &drawInfo, const matrix3x4_t &modelToWorld ); + virtual void DrawStaticPropShadows( const DrawModelInfo_t &drawInfo, const matrix3x4_t &modelToWorld, int flags ); + virtual void ForcedMaterialOverride( IMaterial *newMaterial, OverrideType_t nOverrideType = OVERRIDE_NORMAL ); + DELEGATE_TO_OBJECT_1( StudioDecalHandle_t, CreateDecalList, studiohwdata_t *, g_pStudioRenderImp ); + virtual void DestroyDecalList( StudioDecalHandle_t handle ); + virtual void AddDecal( StudioDecalHandle_t handle, studiohdr_t *pStudioHdr, matrix3x4_t *pBoneToWorld, const Ray_t & ray, const Vector& decalUp, IMaterial* pDecalMaterial, float radius, int body, bool noPokethru, int maxLODToDecal = ADDDECAL_TO_ALL_LODS ); + virtual void ComputeLighting( const Vector* pAmbient, int lightCount, LightDesc_t* pLights, const Vector& pt, const Vector& normal, Vector& lighting ); + virtual void ComputeLightingConstDirectional( const Vector* pAmbient, int lightCount, LightDesc_t* pLights, const Vector& pt, const Vector& normal, Vector& lighting, float flDirectionalAmount ); + virtual void AddShadow( IMaterial* pMaterial, void* pProxyData, FlashlightState_t *pFlashlightState, VMatrix *pWorldToTexture, ITexture *pFlashlightDepthTexture ); + virtual void ClearAllShadows(); + virtual int ComputeModelLod( studiohwdata_t* pHardwareData, float flUnitSphereSize, float *pMetric = NULL ); + virtual void GetPerfStats( DrawModelResults_t *pResults, const DrawModelInfo_t &info, CUtlBuffer *pSpewBuf = NULL ) const; + virtual void GetTriangles( const DrawModelInfo_t& info, matrix3x4_t *pBoneToWorld, GetTriangles_Output_t &out ); + virtual int GetMaterialList( studiohdr_t *pStudioHdr, int count, IMaterial** ppMaterials ); + virtual int GetMaterialListFromBodyAndSkin( MDLHandle_t studio, int nSkin, int nBody, int nCountOutputMaterials, IMaterial** ppOutputMaterials ); + virtual matrix3x4_t* LockBoneMatrices( int nCount ); + virtual void UnlockBoneMatrices(); + virtual void LockFlexWeights( int nWeightCount, float **ppFlexWeights, float **ppFlexDelayedWeights = NULL ); + virtual void UnlockFlexWeights(); + virtual void GetMaterialOverride( IMaterial** ppOutForcedMaterial, OverrideType_t* pOutOverrideType ); + + // Other public methods +public: + CStudioRenderContext(); + virtual ~CStudioRenderContext(); + +private: + // Load, unload materials + void LoadMaterials( studiohdr_t *phdr, OptimizedModel::FileHeader_t *, studioloddata_t &lodData, int lodID ); + + // Determines material flags + void ComputeMaterialFlags( studiohdr_t *phdr, studioloddata_t &lodData, IMaterial *pMaterial ); + + // Creates, destroys static meshes + void R_StudioCreateStaticMeshes( studiohdr_t *pStudioHdr, OptimizedModel::FileHeader_t* pVtxHdr, + studiohwdata_t *pStudioHWData, int lodID, int *pColorMeshID ); + void R_StudioCreateSingleMesh( studiohdr_t *pStudioHdr, studioloddata_t *pStudioLodData, + mstudiomesh_t* pMesh, OptimizedModel::MeshHeader_t* pVtxMesh, int numBones, + studiomeshdata_t* pMeshData, int *pColorMeshID ); + void R_StudioDestroyStaticMeshes( int numStudioMeshes, studiomeshdata_t **ppStudioMeshes ); + + // Determine if any strip groups shouldn't be morphed + void DetermineHWMorphing( mstudiomodel_t *pModel, OptimizedModel::ModelLODHeader_t *pVtxLOD ); + + // Count deltas affecting a particular stripgroup + int CountDeltaFlexedStripGroups( mstudiomodel_t *pModel, OptimizedModel::ModelLODHeader_t *pVtxLOD ); + + // Count vertices affected by deltas in a particular strip group + int CountFlexedVertices( mstudiomesh_t* pMesh, OptimizedModel::StripGroupHeader_t* pStripGroup ); + + // Builds morph data + void R_StudioBuildMorph( studiohdr_t *pStudioHdr, studiomeshgroup_t* pMeshGroup, mstudiomesh_t* pMesh, + OptimizedModel::StripGroupHeader_t *pStripGroup ); + + // Builds the decal bone remap for a particular mesh + void ComputeHWMorphDecalBoneRemap( studiohdr_t *pStudioHdr, OptimizedModel::FileHeader_t *pVtxHdr, studiohwdata_t *pStudioHWData, int nLOD ); + void BuildDecalBoneMap( studiohdr_t *pStudioHdr, int *pUsedBones, int *pBoneRemap, int *pMaxBoneCount, mstudiomesh_t* pMesh, OptimizedModel::StripGroupHeader_t* pStripGroup ); + + // Helper methods used to construct static meshes + int GetNumBoneWeights( const OptimizedModel::StripGroupHeader_t *pGroup ); + VertexFormat_t CalculateVertexFormat( const studiohdr_t *pStudioHdr, const studioloddata_t *pStudioLodData, + const mstudiomesh_t* pMesh, OptimizedModel::StripGroupHeader_t *pGroup, bool bIsHwSkinned ); + bool MeshNeedsTangentSpace( studiohdr_t *pStudioHdr, studioloddata_t *pStudioLodData, mstudiomesh_t* pMesh ); + void R_StudioBuildMeshGroup( const char *pModelName, bool bNeedsTangentSpace, studiomeshgroup_t* pMeshGroup, + OptimizedModel::StripGroupHeader_t *pStripGroup, mstudiomesh_t* pMesh, + studiohdr_t *pStudioHdr, VertexFormat_t vertexFormat ); + void R_StudioBuildMeshStrips( studiomeshgroup_t* pMeshGroup, + OptimizedModel::StripGroupHeader_t *pStripGroup ); + template <VertexCompressionType_t T> bool R_AddVertexToMesh( const char *pModelName, bool bNeedsTangentSpace, CMeshBuilder& meshBuilder, + OptimizedModel::Vertex_t* pVertex, mstudiomesh_t* pMesh, const mstudio_meshvertexdata_t *vertData, bool hwSkin ); + + // This will generate random flex data that has a specified # of non-zero values + void GenerateRandomFlexWeights( int nWeightCount, float* pWeights, float *pDelayedWeights ); + + // Computes LOD + int ComputeRenderLOD( IMatRenderContext *pRenderContext, const DrawModelInfo_t& info, const Vector &origin, float *pMetric ); + + // This invokes proxies of all materials that are queued to be rendered + void InvokeBindProxies( const DrawModelInfo_t &info ); + + // Did this matrix come from our allocator? + bool IsInternallyAllocated( const matrix3x4_t *pBoneToWorld ); + + // Did this flex weights come from our allocator? + bool IsInternallyAllocated( const float *pFlexWeights ); + +private: + StudioRenderContext_t m_RC; + + // Used by the lighting computation methods, + // this is only here to prevent constructors in lightpos_t from being repeatedly run + lightpos_t m_pLightPos[MAXLIGHTCOMPUTE]; +}; + + +//----------------------------------------------------------------------------- +// Inline methods +//----------------------------------------------------------------------------- +inline int CStudioRenderContext::ComputeModelLod( studiohwdata_t *pHardwareData, float flUnitSphereSize, float *pMetric ) +{ + return ComputeModelLODAndMetric( pHardwareData, flUnitSphereSize, pMetric ); +} + + +#endif // STUDIORENDERCONTEXT_H diff --git a/studiorender/xbox/xbox.def b/studiorender/xbox/xbox.def new file mode 100644 index 0000000..64f317e --- /dev/null +++ b/studiorender/xbox/xbox.def @@ -0,0 +1,3 @@ +LIBRARY StudioRender_360.dll +EXPORTS + CreateInterface @1 |