diff options
Diffstat (limited to 'materialsystem/shaderapidx9/dynamicvb.h')
| -rw-r--r-- | materialsystem/shaderapidx9/dynamicvb.h | 1098 |
1 files changed, 1098 insertions, 0 deletions
diff --git a/materialsystem/shaderapidx9/dynamicvb.h b/materialsystem/shaderapidx9/dynamicvb.h new file mode 100644 index 0000000..5eeee8f --- /dev/null +++ b/materialsystem/shaderapidx9/dynamicvb.h @@ -0,0 +1,1098 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//===========================================================================// + +#ifndef DYNAMICVB_H +#define DYNAMICVB_H + +#ifdef _WIN32 +#pragma once +#endif + +#include "locald3dtypes.h" +#include "recording.h" +#include "shaderapidx8_global.h" +#include "shaderapidx8.h" +#include "imeshdx8.h" +#include "materialsystem/ivballoctracker.h" +#include "gpubufferallocator.h" +#include "tier1/utllinkedlist.h" +#include "tier0/dbg.h" +#include "tier1/memstack.h" + +///////////////////////////// +// D. Sim Dietrich Jr. +////////////////////// + + +// Helper function to unbind an vertex buffer +void Unbind( IDirect3DVertexBuffer9 *pVertexBuffer ); + +#define X360_VERTEX_BUFFER_SIZE_MULTIPLIER 2.0 //minimum of 1, only affects dynamic buffers +//#define X360_BLOCK_ON_VB_FLUSH //uncomment to block until all data is consumed when a flush is requested. Otherwise we only block when absolutely necessary + +//#define SPEW_VERTEX_BUFFER_STALLS //uncomment to allow buffer stall spewing. + + +class CVertexBuffer +{ +public: + CVertexBuffer( IDirect3DDevice9 * pD3D, VertexFormat_t fmt, DWORD theFVF, int vertexSize, + int theVertexCount, const char *pTextureBudgetName, bool bSoftwareVertexProcessing, bool dynamic = false ); + +#ifdef _X360 + CVertexBuffer(); + void Init( IDirect3DDevice9 * pD3D, VertexFormat_t fmt, DWORD theFVF, uint8 *pVertexData, int vertexSize, int theVertexCount ); +#endif + + ~CVertexBuffer(); + + LPDIRECT3DVERTEXBUFFER GetInterface() const + { + // If this buffer still exists, then Late Creation didn't happen. Best case: we'll render the wrong image. Worst case: Crash. + Assert( !m_pSysmemBuffer ); + return m_pVB; + } + + // Use at beginning of frame to force a flush of VB contents on first draw + void FlushAtFrameStart() { m_bFlush = true; } + + // lock, unlock + unsigned char* Lock( int numVerts, int& baseVertexIndex ); + unsigned char* Modify( bool bReadOnly, int firstVertex, int numVerts ); + void Unlock( int numVerts ); + + void HandleLateCreation( ); + + // Vertex size + int VertexSize() const { return m_VertexSize; } + + // Vertex count + int VertexCount() const { return m_VertexCount; } +#ifdef _X360 + // For some VBs, memory allocation is managed by CGPUBufferAllocator, via ShaderAPI + const GPUBufferHandle_t *GetBufferAllocationHandle( void ); + void SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle ); + bool IsPooled( void ) { creturn m_GPUBufferHandle.IsValid(); } + // Expose the data pointer for read-only CPU access to the data + // (double-indirection supports relocation of the data by CGPUBufferAllocator) + const byte **GetBufferDataPointerAddress( void ); +#endif // _X360 + + static int BufferCount() + { +#ifdef _DEBUG + return s_BufferCount; +#else + return 0; +#endif + } + + // UID + unsigned int UID() const + { +#ifdef RECORDING + return m_UID; +#else + return 0; +#endif + } + + void HandlePerFrameTextureStats( int frame ) + { +#ifdef VPROF_ENABLED + if ( m_Frame != frame && !m_bDynamic ) + { + m_Frame = frame; + m_pFrameCounter += m_nBufferSize; + } +#endif + } + + // Do we have enough room without discarding? + bool HasEnoughRoom( int numVertices ) const; + + // Is this dynamic? + bool IsDynamic() const { return m_bDynamic; } + bool IsExternal() const { return m_bExternalMemory; } + + // Block until this part of the vertex buffer is free + void BlockUntilUnused( int nBufferSize ); + + // used to alter the characteristics after creation + // allows one dynamic vb to be shared for multiple formats + void ChangeConfiguration( int vertexSize, int totalSize ) + { + Assert( m_bDynamic && !m_bLocked && vertexSize ); + m_VertexSize = vertexSize; + m_VertexCount = m_nBufferSize / vertexSize; + } + + // Compute the next offset for the next lock + int NextLockOffset( ) const; + + // Returns the allocated size + int AllocationSize() const; + + // Returns the number of vertices we have enough room for + int NumVerticesUntilFlush() const + { +#if defined( _X360 ) + if( m_AllocationRing.Count() ) + { + //Cycle through the ring buffer and see what memory is free now + int iNode = m_AllocationRing.Head(); + while( m_AllocationRing.IsValidIndex( iNode ) ) + { + if( Dx9Device()->IsFencePending( m_AllocationRing[iNode].m_Fence ) ) + break; + + iNode = m_AllocationRing.Next( iNode ); + } + + if( m_AllocationRing.IsValidIndex( iNode ) ) + { + int iEndFreeOffset = m_AllocationRing[iNode].m_iEndOffset; + if( iEndFreeOffset < m_Position ) + { + //Wrapped. Making the arbitrary decision that the return value for this function *should* handle the singe giant allocation case which requires contiguous memory + if( iEndFreeOffset > (m_iNextBlockingPosition - m_Position) ) + return iEndFreeOffset / m_VertexSize; + else + return (m_iNextBlockingPosition - m_Position) / m_VertexSize; + } + } + else + { + //we didn't block on any fence + return m_VertexCount; + } + } + + return m_VertexCount; +#else + return (m_nBufferSize - NextLockOffset()) / m_VertexSize; +#endif + } + + // Marks a fence indicating when this buffer was used + void MarkUsedInRendering() + { +#ifdef _X360 + if ( m_bDynamic && m_pVB ) + { + Assert( m_AllocationRing.Count() > 0 ); + m_AllocationRing[m_AllocationRing.Tail()].m_Fence = Dx9Device()->GetCurrentFence(); + } +#endif + } + +private: + void Create( IDirect3DDevice9 *pD3D ); + inline void ReallyUnlock( int unlockBytes ) + { + #if DX_TO_GL_ABSTRACTION + // Knowing how much data was actually written is critical for performance under OpenGL. + m_pVB->UnlockActualSize( unlockBytes ); + #else + unlockBytes; // Unused here + m_pVB->Unlock(); + #endif + } + + enum LOCK_FLAGS + { + LOCKFLAGS_FLUSH = D3DLOCK_NOSYSLOCK | D3DLOCK_DISCARD, +#if !defined( _X360 ) + LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK | D3DLOCK_NOOVERWRITE +#else + // X360BUG: forcing all locks to gpu flush, otherwise bizarre mesh corruption on decals + // Currently iterating with microsoft 360 support to track source of gpu corruption + LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK +#endif + }; + + LPDIRECT3DVERTEXBUFFER m_pVB; + +#ifdef _X360 + struct DynamicBufferAllocation_t + { + DWORD m_Fence; //track whether this memory is safe to use again. + int m_iStartOffset; + int m_iEndOffset; + unsigned int m_iZPassIdx; // The zpass during which this allocation was made + }; + + int m_iNextBlockingPosition; // m_iNextBlockingPosition >= m_Position where another allocation is still in use. + unsigned char *m_pAllocatedMemory; + int m_iAllocationSize; //Total size of the ring buffer, usually more than what was asked for + IDirect3DVertexBuffer9 m_D3DVertexBuffer; //Only need one shared D3D header for our usage patterns. + CUtlLinkedList<DynamicBufferAllocation_t> m_AllocationRing; //tracks what chunks of our memory are potentially still in use by D3D + + GPUBufferHandle_t m_GPUBufferHandle; // Handle to a memory allocation within a shared physical memory pool (see CGPUBufferAllocator) +#endif + + VertexFormat_t m_VertexBufferFormat; // yes, Vertex, only used for allocation tracking + int m_nBufferSize; + int m_Position; + int m_VertexCount; + int m_VertexSize; + DWORD m_TheFVF; + byte *m_pSysmemBuffer; + int m_nSysmemBufferStartBytes; + + uint m_nLockCount; + unsigned char m_bDynamic : 1; + unsigned char m_bLocked : 1; + unsigned char m_bFlush : 1; + unsigned char m_bExternalMemory : 1; + unsigned char m_bSoftwareVertexProcessing : 1; + unsigned char m_bLateCreateShouldDiscard : 1; + +#ifdef VPROF_ENABLED + int m_Frame; + int *m_pFrameCounter; + int *m_pGlobalCounter; +#endif + +#ifdef _DEBUG + static int s_BufferCount; +#endif + +#ifdef RECORDING + unsigned int m_UID; +#endif +}; + +#if defined( _X360 ) +#include "utlmap.h" +MEMALLOC_DECLARE_EXTERNAL_TRACKING( XMem_CVertexBuffer ); +#endif + +//----------------------------------------------------------------------------- +// constructor, destructor +//----------------------------------------------------------------------------- +inline CVertexBuffer::CVertexBuffer(IDirect3DDevice9 * pD3D, VertexFormat_t fmt, DWORD theFVF, + int vertexSize, int vertexCount, const char *pTextureBudgetName, + bool bSoftwareVertexProcessing, bool dynamic ) : + m_pVB(0), + m_Position(0), + m_VertexSize(vertexSize), + m_VertexCount(vertexCount), + m_bFlush(true), + m_bLocked(false), + m_bExternalMemory( false ), + m_nBufferSize(vertexSize * vertexCount), + m_TheFVF( theFVF ), + m_bSoftwareVertexProcessing( bSoftwareVertexProcessing ), + m_bDynamic(dynamic), + m_VertexBufferFormat( fmt ), + m_bLateCreateShouldDiscard( false ) +#ifdef _X360 + ,m_pAllocatedMemory(NULL) + ,m_iNextBlockingPosition(0) + ,m_iAllocationSize(0) +#endif +#ifdef VPROF_ENABLED + ,m_Frame( -1 ) +#endif +{ + MEM_ALLOC_CREDIT_( pTextureBudgetName ); + +#ifdef RECORDING + // assign a UID + static unsigned int uid = 0; + m_UID = uid++; +#endif + +#ifdef _DEBUG + ++s_BufferCount; +#endif + +#ifdef VPROF_ENABLED + if ( !m_bDynamic ) + { + char name[256]; + V_strcpy_safe( name, "TexGroup_global_" ); + V_strcat_safe( name, pTextureBudgetName, sizeof(name) ); + m_pGlobalCounter = g_VProfCurrentProfile.FindOrCreateCounter( name, COUNTER_GROUP_TEXTURE_GLOBAL ); + + V_strcpy_safe( name, "TexGroup_frame_" ); + V_strcat_safe( name, pTextureBudgetName, sizeof(name) ); + m_pFrameCounter = g_VProfCurrentProfile.FindOrCreateCounter( name, COUNTER_GROUP_TEXTURE_PER_FRAME ); + } + else + { + m_pGlobalCounter = g_VProfCurrentProfile.FindOrCreateCounter( "TexGroup_global_" TEXTURE_GROUP_DYNAMIC_VERTEX_BUFFER, COUNTER_GROUP_TEXTURE_GLOBAL ); + } +#endif + + if ( !g_pShaderUtil->IsRenderThreadSafe() ) + { + m_pSysmemBuffer = ( byte * )MemAlloc_AllocAligned( m_nBufferSize, 16 ); + m_nSysmemBufferStartBytes = 0; + } + else + { + m_pSysmemBuffer = NULL; + Create( pD3D ); + } + +#ifdef VPROF_ENABLED + if ( IsX360() || !m_bDynamic ) + { + Assert( m_pGlobalCounter ); + *m_pGlobalCounter += m_nBufferSize; + } +#endif +} + + +void CVertexBuffer::Create( IDirect3DDevice9 *pD3D ) +{ + D3DVERTEXBUFFER_DESC desc; + memset( &desc, 0x00, sizeof( desc ) ); + desc.Format = D3DFMT_VERTEXDATA; + desc.Size = m_nBufferSize; + desc.Type = D3DRTYPE_VERTEXBUFFER; + desc.Pool = m_bDynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED; + desc.FVF = m_TheFVF; + +#if defined(IS_WINDOWS_PC) && defined(SHADERAPIDX9) + extern bool g_ShaderDeviceUsingD3D9Ex; + if ( g_ShaderDeviceUsingD3D9Ex ) + { + desc.Pool = D3DPOOL_DEFAULT; + } +#endif + + desc.Usage = D3DUSAGE_WRITEONLY; + if ( m_bDynamic ) + { + desc.Usage |= D3DUSAGE_DYNAMIC; + // Dynamic meshes should never be compressed (slows down writing to them) + Assert( CompressionType( m_TheFVF ) == VERTEX_COMPRESSION_NONE ); + } + if ( m_bSoftwareVertexProcessing ) + { + desc.Usage |= D3DUSAGE_SOFTWAREPROCESSING; + } + +#if !defined( _X360 ) + RECORD_COMMAND( DX8_CREATE_VERTEX_BUFFER, 6 ); + RECORD_INT( m_UID ); + RECORD_INT( m_nBufferSize ); + RECORD_INT( desc.Usage ); + RECORD_INT( desc.FVF ); + RECORD_INT( desc.Pool ); + RECORD_INT( m_bDynamic ); + + HRESULT hr = pD3D->CreateVertexBuffer( m_nBufferSize, desc.Usage, desc.FVF, desc.Pool, &m_pVB, NULL ); + + if ( hr == D3DERR_OUTOFVIDEOMEMORY || hr == E_OUTOFMEMORY ) + { + // Don't have the memory for this. Try flushing all managed resources + // out of vid mem and try again. + // FIXME: need to record this + pD3D->EvictManagedResources(); + pD3D->CreateVertexBuffer( m_nBufferSize, desc.Usage, desc.FVF, desc.Pool, &m_pVB, NULL ); + } + +#ifdef _DEBUG + if ( hr != D3D_OK ) + { + switch ( hr ) + { + case D3DERR_INVALIDCALL: + Assert( !"D3DERR_INVALIDCALL" ); + break; + case D3DERR_OUTOFVIDEOMEMORY: + Assert( !"D3DERR_OUTOFVIDEOMEMORY" ); + break; + case E_OUTOFMEMORY: + Assert( !"E_OUTOFMEMORY" ); + break; + default: + Assert( 0 ); + break; + } + } +#endif + + Assert( m_pVB ); +#else + // _X360 + if ( m_bDynamic ) + { + m_iAllocationSize = m_nBufferSize * X360_VERTEX_BUFFER_SIZE_MULTIPLIER; + Assert( m_iAllocationSize >= m_nBufferSize ); + m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationSize, MAXULONG_PTR, 0, PAGE_READWRITE | MEM_LARGE_PAGES | PAGE_WRITECOMBINE ); + } + else if ( MeshMgr()->AllocatePooledVB( this, m_nBufferSize, pTextureBudgetName ) ) + { + // Successfully allocated in a shared ShaderAPI memory pool (SetBufferAllocationHandle will have been called to set the pointer and stream offset) + m_iAllocationSize = m_nBufferSize; + Assert( m_pAllocatedMemory ); + } + else + { + // Fall back to allocating a standalone VB + // NOTE: write-combining (PAGE_WRITECOMBINE) is deliberately not used, since it slows down CPU access to the data (decals+defragmentation) + m_iAllocationSize = m_nBufferSize; + m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationSize, MAXULONG_PTR, 0, PAGE_READWRITE ); + } + + if ( m_pAllocatedMemory && !IsPooled() ) + { + MemAlloc_RegisterExternalAllocation( XMem_CVertexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) ); + if ( !m_bDynamic ) + { + // Track non-pooled physallocs, to help tune CGPUBufferAllocator usage + g_SizeIndividualVBPhysAllocs += XPhysicalSize( m_pAllocatedMemory ); + g_NumIndividualVBPhysAllocs++; + } + } + + m_iNextBlockingPosition = m_iAllocationSize; +#endif + +#ifdef MEASURE_DRIVER_ALLOCATIONS + int nMemUsed = 1024; + VPROF_INCREMENT_GROUP_COUNTER( "vb count", COUNTER_GROUP_NO_RESET, 1 ); + VPROF_INCREMENT_GROUP_COUNTER( "vb driver mem", COUNTER_GROUP_NO_RESET, nMemUsed ); + VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, nMemUsed ); +#endif + + // Track VB allocations +#if !defined( _X360 ) + g_VBAllocTracker->CountVB( m_pVB, m_bDynamic, m_nBufferSize, m_VertexSize, m_VertexBufferFormat ); +#else + g_VBAllocTracker->CountVB( this, m_bDynamic, m_iAllocationSize, m_VertexSize, m_VertexBufferFormat ); +#endif +} + + +#ifdef _X360 +void *AllocateTempBuffer( size_t nSizeInBytes ); + +//----------------------------------------------------------------------------- +// This variant is for when we already have the data in physical memory +//----------------------------------------------------------------------------- +inline CVertexBuffer::CVertexBuffer( ) : + m_pVB( 0 ), + m_Position( 0 ), + m_VertexSize( 0 ), + m_VertexCount( 0 ), + m_bFlush( false ), + m_bLocked( false ), + m_bExternalMemory( true ), + m_nBufferSize( 0 ), + m_bDynamic( false ) +#ifdef VPROF_ENABLED + ,m_Frame( -1 ) +#endif +{ + m_iAllocationSize = 0; + m_pAllocatedMemory = 0; + m_iNextBlockingPosition = 0; +} + +#include "tier0/memdbgoff.h" + +inline void CVertexBuffer::Init( IDirect3DDevice9 *pD3D, VertexFormat_t fmt, DWORD theFVF, uint8 *pVertexData, int vertexSize, int vertexCount ) +{ + m_nBufferSize = vertexSize * vertexCount; + m_Position = m_Position; + m_VertexSize = vertexSize; + m_VertexCount = vertexCount; + m_iAllocationSize = m_nBufferSize; + m_pAllocatedMemory = pVertexData; + m_iNextBlockingPosition = m_iAllocationSize; + + m_pVB = new( AllocateTempBuffer( sizeof( IDirect3DVertexBuffer9 ) ) ) IDirect3DVertexBuffer9; + XGSetVertexBufferHeader( m_nBufferSize, 0, 0, 0, m_pVB ); + XGOffsetResourceAddress( m_pVB, pVertexData ); +} + +#include "tier0/memdbgon.h" + +#endif // _X360 + +inline CVertexBuffer::~CVertexBuffer() +{ + // Track VB allocations (even if pooled) +#if !defined( _X360 ) + if ( m_pVB != NULL ) + { + g_VBAllocTracker->UnCountVB( m_pVB ); + } +#else + if ( m_pVB && m_pVB->IsSet( Dx9Device() ) ) + { + Unbind( m_pVB ); + } + + if ( !m_bExternalMemory ) + { + g_VBAllocTracker->UnCountVB( this ); + } +#endif + + if ( !m_bExternalMemory ) + { +#ifdef MEASURE_DRIVER_ALLOCATIONS + int nMemUsed = 1024; + VPROF_INCREMENT_GROUP_COUNTER( "vb count", COUNTER_GROUP_NO_RESET, -1 ); + VPROF_INCREMENT_GROUP_COUNTER( "vb driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed ); + VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed ); +#endif + +#ifdef VPROF_ENABLED + if ( IsX360() || !m_bDynamic ) + { + Assert( m_pGlobalCounter ); + *m_pGlobalCounter -= m_nBufferSize; + } +#endif + +#ifdef _DEBUG + --s_BufferCount; +#endif + } + + Unlock( 0 ); + + if ( m_pSysmemBuffer ) + { + MemAlloc_FreeAligned( m_pSysmemBuffer ); + m_pSysmemBuffer = NULL; + } + +#if !defined( _X360 ) + if ( m_pVB ) + { + RECORD_COMMAND( DX8_DESTROY_VERTEX_BUFFER, 1 ); + RECORD_INT( m_UID ); + + m_pVB->Release(); + } +#else + if ( m_pAllocatedMemory && !m_bExternalMemory ) + { + if ( IsPooled() ) + { + MeshMgr()->DeallocatePooledVB( this ); + } + else + { + MemAlloc_RegisterExternalDeallocation( XMem_CVertexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) ); + if ( !m_bDynamic ) + { + // Track non-pooled physallocs, to help tune CGPUBufferAllocator usage + g_SizeIndividualVBPhysAllocs -= XPhysicalSize( m_pAllocatedMemory ); + g_NumIndividualVBPhysAllocs--; + } + XPhysicalFree( m_pAllocatedMemory ); + } + } + + m_pAllocatedMemory = NULL; + m_pVB = NULL; +#endif // _X360 +} +#ifdef _X360 +//----------------------------------------------------------------------------- +// Get memory allocation data +//----------------------------------------------------------------------------- +inline const GPUBufferHandle_t *CVertexBuffer::GetBufferAllocationHandle( void ) +{ + Assert( IsPooled() ); + return ( IsPooled() ? &m_GPUBufferHandle : NULL ); +} + +//----------------------------------------------------------------------------- +// Update memory allocation data +//----------------------------------------------------------------------------- +inline void CVertexBuffer::SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle ) +{ + // This VB's memory has been reallocated or freed, update our cached pointer and the D3D header + // NOTE: this should never be called while any rendering is in flight! + Assert( ( m_pAllocatedMemory == NULL ) || IsPooled() ); + if ( ( m_pAllocatedMemory == NULL ) || IsPooled() ) + { + m_GPUBufferHandle = bufferAllocationHandle; + m_pAllocatedMemory = m_GPUBufferHandle.pMemory; + if ( m_pVB ) + { + XGSetVertexBufferHeader( m_nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); + XGOffsetResourceAddress( m_pVB, m_pAllocatedMemory ); + } + } +} + +//----------------------------------------------------------------------------- +// Expose the data pointer for read-only CPU access to the data +//----------------------------------------------------------------------------- +inline const byte **CVertexBuffer::GetBufferDataPointerAddress( void ) +{ + if ( m_bDynamic /* FIXME: || m_bExternalMemory*/ ) + return NULL; + return (const byte **)&m_pAllocatedMemory; +} +#endif // _X360 + +//----------------------------------------------------------------------------- +// Compute the next offset for the next lock +//----------------------------------------------------------------------------- +inline int CVertexBuffer::NextLockOffset( ) const +{ +#if !defined( _X360 ) + int nNextOffset = ( m_Position + m_VertexSize - 1 ) / m_VertexSize; + nNextOffset *= m_VertexSize; + return nNextOffset; +#else + return m_Position; //position is already aligned properly on unlocks for 360. +#endif +} + + +//----------------------------------------------------------------------------- +// Do we have enough room without discarding? +//----------------------------------------------------------------------------- +inline bool CVertexBuffer::HasEnoughRoom( int numVertices ) const +{ +#if defined( _X360 ) + return numVertices <= m_VertexCount; //the ring buffer will free room as needed +#else + return (NextLockOffset() + (numVertices * m_VertexSize)) <= m_nBufferSize; +#endif +} + +//----------------------------------------------------------------------------- +// Block until this part of the index buffer is free +//----------------------------------------------------------------------------- +inline void CVertexBuffer::BlockUntilUnused( int nBufferSize ) +{ + Assert( nBufferSize <= m_nBufferSize ); + +#ifdef _X360 + int nLockOffset = NextLockOffset(); + Assert( (m_AllocationRing.Count() != 0) || ((m_Position == 0) && (m_iNextBlockingPosition == m_iAllocationSize)) ); + + if ( (m_iNextBlockingPosition - nLockOffset) >= nBufferSize ) + return; + + Assert( (m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset == 0) || ((m_iNextBlockingPosition == m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset) && (m_Position <= m_iNextBlockingPosition)) ); + + int iMinBlockPosition = nLockOffset + nBufferSize; + if( iMinBlockPosition > m_iAllocationSize ) + { + //Allocation requires us to wrap + iMinBlockPosition = nBufferSize; + m_Position = 0; + + //modify the last allocation so that it uses up the whole tail end of the buffer. Makes other code simpler + Assert( m_AllocationRing.Count() != 0 ); + m_AllocationRing[m_AllocationRing.Tail()].m_iEndOffset = m_iAllocationSize; + + //treat all allocations between the current position and the tail end of the ring as freed since they will be before we unblock + while( m_AllocationRing.Count() ) + { + unsigned int head = m_AllocationRing.Head(); + if( m_AllocationRing[head].m_iStartOffset == 0 ) + break; + + m_AllocationRing.Remove( head ); + } + } + + //now we go through the allocations until we find the last fence we care about. Treat everything up until that fence as freed. + DWORD FinalFence = 0; + unsigned int iFinalAllocationZPassIdx = 0; + while( m_AllocationRing.Count() ) + { + unsigned int head = m_AllocationRing.Head(); + + if( m_AllocationRing[head].m_iEndOffset >= iMinBlockPosition ) + { + //When this frees, we'll finally have enough space for the allocation + FinalFence = m_AllocationRing[head].m_Fence; + iFinalAllocationZPassIdx = m_AllocationRing[head].m_iZPassIdx; + m_iNextBlockingPosition = m_AllocationRing[head].m_iEndOffset; + m_AllocationRing.Remove( head ); + break; + } + m_AllocationRing.Remove( head ); + } + Assert( FinalFence != 0 ); + + if( Dx9Device()->IsFencePending( FinalFence ) ) + { +#ifdef SPEW_VERTEX_BUFFER_STALLS + float st = Plat_FloatTime(); +#endif + + if ( ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) && + ( iFinalAllocationZPassIdx == ShaderAPI()->Get360ZPassCounter() ) ) + { + // We're about to overrun our VB ringbuffer in a single Z prepass. To avoid rendering corruption, close out the + // Z prepass and continue. This will reduce early-Z rejection efficiency and could cause a momentary framerate drop, + // but it's better than rendering corruption. + Warning( "Dynamic VB ring buffer overrun in Z Prepass. Tell Thorsten.\n" ); + + ShaderAPI()->End360ZPass(); + } + + Dx9Device()->BlockOnFence( FinalFence ); + +#ifdef SPEW_VERTEX_BUFFER_STALLS + float dt = Plat_FloatTime() - st; + Warning( "Blocked locking dynamic vertex buffer for %f ms!\n", 1000.0 * dt ); +#endif + } + +#endif +} + + +//----------------------------------------------------------------------------- +// lock, unlock +//----------------------------------------------------------------------------- +inline unsigned char* CVertexBuffer::Lock( int numVerts, int& baseVertexIndex ) +{ +#if defined( _X360 ) + if ( m_pVB && m_pVB->IsSet( Dx9Device() ) ) + { + Unbind( m_pVB ); + } +#endif + + m_nLockCount = numVerts; + + unsigned char* pLockedData = 0; + baseVertexIndex = 0; + int nBufferSize = numVerts * m_VertexSize; + + Assert( IsPC() || ( IsX360() && !m_bLocked ) ); + + // Ensure there is enough space in the VB for this data + if ( numVerts > m_VertexCount ) + { + Assert( 0 ); + return 0; + } + + if ( !IsX360() && !m_pVB && !m_pSysmemBuffer ) + return 0; + + DWORD dwFlags; + if ( m_bDynamic ) + { + dwFlags = LOCKFLAGS_APPEND; + +#if !defined( _X360 ) + // If either the user forced us to flush, + // or there is not enough space for the vertex data, + // then flush the buffer contents + if ( !m_Position || m_bFlush || !HasEnoughRoom(numVerts) ) + { + if ( m_pSysmemBuffer || !g_pShaderUtil->IsRenderThreadSafe() ) + m_bLateCreateShouldDiscard = true; + m_bFlush = false; + m_Position = 0; + + dwFlags = LOCKFLAGS_FLUSH; + } +#else + if( m_bFlush ) + { +# if ( defined( X360_BLOCK_ON_VB_FLUSH ) ) + { + if( m_AllocationRing.Count() ) + { + DWORD FinalFence = m_AllocationRing[m_AllocationRing.Tail()].m_Fence; + + m_AllocationRing.RemoveAll(); + m_Position = 0; + m_iNextBlockingPosition = m_iAllocationSize; + +# if ( defined( SPEW_VERTEX_BUFFER_STALLS ) ) + if( Dx9Device()->IsFencePending( FinalFence ) ) + { + float st = Plat_FloatTime(); +# endif + Dx9Device()->BlockOnFence( FinalFence ); +# if ( defined ( SPEW_VERTEX_BUFFER_STALLS ) ) + float dt = Plat_FloatTime() - st; + Warning( "Blocked FLUSHING dynamic vertex buffer for %f ms!\n", 1000.0 * dt ); + } +# endif + } + } +# endif + m_bFlush = false; + } +#endif + } + else + { + // Since we are a static VB, always lock the beginning of the buffer. + dwFlags = D3DLOCK_NOSYSLOCK; + m_Position = 0; + } + + if ( IsX360() && m_bDynamic ) + { + // Block until we have enough room in the buffer, this affects the result of NextLockOffset() in wrap conditions. + BlockUntilUnused( nBufferSize ); + m_pVB = NULL; + } + + int nLockOffset = NextLockOffset( ); + RECORD_COMMAND( DX8_LOCK_VERTEX_BUFFER, 4 ); + RECORD_INT( m_UID ); + RECORD_INT( nLockOffset ); + RECORD_INT( nBufferSize ); + RECORD_INT( dwFlags ); + +#if !defined( _X360 ) + // If the caller isn't in the thread that owns the render lock, need to return a system memory pointer--cannot talk to GL from + // the non-current thread. + if ( !m_pSysmemBuffer && !g_pShaderUtil->IsRenderThreadSafe() ) + { + m_pSysmemBuffer = ( byte * )MemAlloc_AllocAligned( m_nBufferSize, 16 ); + m_nSysmemBufferStartBytes = nLockOffset; + Assert( ( m_nSysmemBufferStartBytes % m_VertexSize ) == 0 ); + } + + if ( m_pSysmemBuffer != NULL ) + { + // Ensure that we're never moving backwards in a buffer--this code would need to be rewritten if so. + // We theorize this can happen if you hit the end of a buffer and then wrap before drawing--but + // this would probably break in other places as well. + Assert( nLockOffset >= m_nSysmemBufferStartBytes ); + pLockedData = m_pSysmemBuffer + nLockOffset; + } + else + { + m_pVB->Lock( nLockOffset, + nBufferSize, + reinterpret_cast< void** >( &pLockedData ), + dwFlags ); + } +#else + pLockedData = m_pAllocatedMemory + nLockOffset; +#endif + + Assert( pLockedData != 0 ); + m_bLocked = true; + if ( !IsX360() ) + { + baseVertexIndex = nLockOffset / m_VertexSize; + } + else + { + baseVertexIndex = 0; + } + return pLockedData; +} + +inline unsigned char* CVertexBuffer::Modify( bool bReadOnly, int firstVertex, int numVerts ) +{ + unsigned char* pLockedData = 0; + + // D3D still returns a pointer when you call lock with 0 verts, so just in + // case it's actually doing something, don't even try to lock the buffer with 0 verts. + if ( numVerts == 0 ) + return NULL; + + m_nLockCount = numVerts; + + // If this hits, m_pSysmemBuffer logic needs to be added to this code. + Assert( g_pShaderUtil->IsRenderThreadSafe() ); + Assert( !m_pSysmemBuffer ); // if this hits, then we need to add code to handle it + + Assert( m_pVB && !m_bDynamic ); + + if ( firstVertex + numVerts > m_VertexCount ) + { + Assert( 0 ); + return NULL; + } + + DWORD dwFlags = D3DLOCK_NOSYSLOCK; + if ( bReadOnly ) + { + dwFlags |= D3DLOCK_READONLY; + } + + RECORD_COMMAND( DX8_LOCK_VERTEX_BUFFER, 4 ); + RECORD_INT( m_UID ); + RECORD_INT( firstVertex * m_VertexSize ); + RECORD_INT( numVerts * m_VertexSize ); + RECORD_INT( dwFlags ); + + // mmw: for forcing all dynamic... LOCKFLAGS_FLUSH ); +#if !defined( _X360 ) + m_pVB->Lock( + firstVertex * m_VertexSize, + numVerts * m_VertexSize, + reinterpret_cast< void** >( &pLockedData ), + dwFlags ); +#else + if ( m_pVB->IsSet( Dx9Device() ) ) + { + Unbind( m_pVB ); + } + pLockedData = m_pAllocatedMemory + (firstVertex * m_VertexSize); +#endif + + m_Position = firstVertex * m_VertexSize; + Assert( pLockedData != 0 ); + m_bLocked = true; + + return pLockedData; +} + +inline void CVertexBuffer::Unlock( int numVerts ) +{ + if ( !m_bLocked ) + return; + + if ( !IsX360() && !m_pVB && !m_pSysmemBuffer ) + return; + + int nLockOffset = NextLockOffset(); + int nBufferSize = numVerts * m_VertexSize; + + RECORD_COMMAND( DX8_UNLOCK_VERTEX_BUFFER, 1 ); + RECORD_INT( m_UID ); + +#if !defined( _X360 ) + if ( m_pSysmemBuffer != NULL ) + { + } + else + { + #if DX_TO_GL_ABSTRACTION + Assert( numVerts <= (int)m_nLockCount ); + int unlockBytes = ( m_bDynamic ? nBufferSize : ( m_nLockCount * m_VertexSize ) ); + #else + int unlockBytes = 0; + #endif + + ReallyUnlock( unlockBytes ); + } + m_Position = nLockOffset + nBufferSize; +#else + if ( m_bDynamic ) + { + if ( numVerts > 0 ) + { + DynamicBufferAllocation_t LockData; + LockData.m_Fence = Dx9Device()->GetCurrentFence(); //This isn't the correct fence, but it's all we have access to for now and it'll provide marginal safety if something goes really wrong. + LockData.m_iStartOffset = nLockOffset; + LockData.m_iEndOffset = LockData.m_iStartOffset + nBufferSize; + LockData.m_iZPassIdx = ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) ? ShaderAPI()->Get360ZPassCounter() : 0; + + // Round dynamic locks to 4k boundaries for GPU cache reasons + LockData.m_iEndOffset = ALIGN_VALUE( LockData.m_iEndOffset, 4096 ); + if( LockData.m_iEndOffset > m_iAllocationSize ) + LockData.m_iEndOffset = m_iAllocationSize; + + m_AllocationRing.AddToTail( LockData ); + m_Position = LockData.m_iEndOffset; + + void* pLockedData = m_pAllocatedMemory + LockData.m_iStartOffset; + + //Always re-use the same vertex buffer header based on the assumption that D3D copies it off in the draw calls. + m_pVB = &m_D3DVertexBuffer; + XGSetVertexBufferHeader( nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); + XGOffsetResourceAddress( m_pVB, pLockedData ); + + // Invalidate the GPU caches for this memory. + Dx9Device()->InvalidateGpuCache( pLockedData, nBufferSize, 0 ); + } + } + else + { + if ( !m_pVB ) + { + m_pVB = &m_D3DVertexBuffer; + XGSetVertexBufferHeader( m_nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); + XGOffsetResourceAddress( m_pVB, m_pAllocatedMemory ); + } + m_Position = nLockOffset + nBufferSize; + + // Invalidate the GPU caches for this memory. + Dx9Device()->InvalidateGpuCache( m_pAllocatedMemory, m_nBufferSize, 0 ); + } +#endif + + m_bLocked = false; +} + + +inline void CVertexBuffer::HandleLateCreation( ) +{ + if ( !m_pSysmemBuffer ) + { + return; + } + + if( !m_pVB ) + { + bool bPrior = g_VBAllocTracker->TrackMeshAllocations( "HandleLateCreation" ); + Create( Dx9Device() ); + if ( !bPrior ) + { + g_VBAllocTracker->TrackMeshAllocations( NULL ); + } + } + + void* pWritePtr = NULL; + const int dataToWriteBytes = m_bDynamic ? ( m_Position - m_nSysmemBufferStartBytes ) : ( m_nLockCount * m_VertexSize ); + DWORD dwFlags = D3DLOCK_NOSYSLOCK; + if ( m_bDynamic ) + { + dwFlags |= ( m_bLateCreateShouldDiscard ? D3DLOCK_DISCARD : D3DLOCK_NOOVERWRITE ); + } + + // Always clear this. + m_bLateCreateShouldDiscard = false; + + // Don't use the Lock function, it does a bunch of stuff we don't want. + HRESULT hr = m_pVB->Lock( m_nSysmemBufferStartBytes, + dataToWriteBytes, + &pWritePtr, + dwFlags); + + // If this fails we're about to crash. Consider skipping the update and leaving + // m_pSysmemBuffer around to try again later. (For example in case of device loss) + Assert( SUCCEEDED( hr ) ); hr; + memcpy( pWritePtr, m_pSysmemBuffer + m_nSysmemBufferStartBytes, dataToWriteBytes ); + ReallyUnlock( dataToWriteBytes ); + + MemAlloc_FreeAligned( m_pSysmemBuffer ); + m_pSysmemBuffer = NULL; +} + + +// Returns the allocated size +inline int CVertexBuffer::AllocationSize() const +{ +#ifdef _X360 + return m_iAllocationSize; +#else + return m_VertexCount * m_VertexSize; +#endif +} + + +#endif // DYNAMICVB_H + |