diff options
Diffstat (limited to 'public/tier1/utlsymbollarge.h')
| -rw-r--r-- | public/tier1/utlsymbollarge.h | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/public/tier1/utlsymbollarge.h b/public/tier1/utlsymbollarge.h new file mode 100644 index 0000000..4384540 --- /dev/null +++ b/public/tier1/utlsymbollarge.h @@ -0,0 +1,499 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: Defines a large symbol table (intp sized handles, can store more than 64k strings) +// +// $Header: $ +// $NoKeywords: $ +//===========================================================================// + +#ifndef UTLSYMBOLLARGE_H +#define UTLSYMBOLLARGE_H + +#ifdef _WIN32 +#pragma once +#endif + +#include "tier0/threadtools.h" +#include "tier1/utltshash.h" +#include "tier1/stringpool.h" +#include "tier0/vprof.h" +#include "tier1/utltshash.h" + +//----------------------------------------------------------------------------- +// CUtlSymbolTableLarge: +// description: +// This class defines a symbol table, which allows us to perform mappings +// of strings to symbols and back. +// +// This class stores the strings in a series of string pools. The returned CUtlSymbolLarge is just a pointer +// to the string data, the hash precedes it in memory and is used to speed up searching, etc. +//----------------------------------------------------------------------------- + +typedef intp UtlSymLargeId_t; + +#define UTL_INVAL_SYMBOL_LARGE ((UtlSymLargeId_t)~0) + +class CUtlSymbolLarge +{ +public: + // constructor, destructor + CUtlSymbolLarge() + { + u.m_Id = UTL_INVAL_SYMBOL_LARGE; + } + + CUtlSymbolLarge( UtlSymLargeId_t id ) + { + u.m_Id = id; + } + CUtlSymbolLarge( CUtlSymbolLarge const& sym ) + { + u.m_Id = sym.u.m_Id; + } + + // operator= + CUtlSymbolLarge& operator=( CUtlSymbolLarge const& src ) + { + u.m_Id = src.u.m_Id; + return *this; + } + + // operator== + bool operator==( CUtlSymbolLarge const& src ) const + { + return u.m_Id == src.u.m_Id; + } + + // operator== + bool operator==( UtlSymLargeId_t const& src ) const + { + return u.m_Id == src; + } + + // operator== + bool operator!=( CUtlSymbolLarge const& src ) const + { + return u.m_Id != src.u.m_Id; + } + + // operator== + bool operator!=( UtlSymLargeId_t const& src ) const + { + return u.m_Id != src; + } + + // Gets at the symbol + operator UtlSymLargeId_t const() const + { + return u.m_Id; + } + + // Gets the string associated with the symbol + inline const char* String( ) const + { + if ( u.m_Id == UTL_INVAL_SYMBOL_LARGE ) + return ""; + return u.m_pAsString; + } + + inline bool IsValid() const + { + return u.m_Id != UTL_INVAL_SYMBOL_LARGE ? true : false; + } + +private: + // Disallowed + CUtlSymbolLarge( const char* pStr ); // they need to go through the table to assign the ptr + bool operator==( const char* pStr ) const; // disallow since we don't know if the table this is from was case sensitive or not... maybe we don't care + + union + { + UtlSymLargeId_t m_Id; + char const *m_pAsString; + } u; +}; + +#define MIN_STRING_POOL_SIZE 2048 + +inline uint32 CUtlSymbolLarge_Hash( bool CASEINSENSITIVE, const char *pString, int len ) +{ + return ( CASEINSENSITIVE ? HashStringCaseless( pString ) : HashString( pString ) ); +} + +typedef uint32 LargeSymbolTableHashDecoration_t; + +// The structure consists of the hash immediately followed by the string data +struct CUtlSymbolTableLargeBaseTreeEntry_t +{ + LargeSymbolTableHashDecoration_t m_Hash; + // Variable length string data + char m_String[1]; + + bool IsEmpty() const + { + return ( ( m_Hash == 0 ) && ( 0 == m_String[0] ) ); + } + + char const *String() const + { + return (const char *)&m_String[ 0 ]; + } + + CUtlSymbolLarge ToSymbol() const + { + return reinterpret_cast< UtlSymLargeId_t >( String() ); + } + + LargeSymbolTableHashDecoration_t HashValue() const + { + return m_Hash; + } +}; + +template< class TreeType, bool CASEINSENSITIVE > +class CTreeEntryLess +{ +public: + CTreeEntryLess( int ignored = 0 ) {} // permits default initialization to NULL in CUtlRBTree + bool operator!() const { return false; } + bool operator()( CUtlSymbolTableLargeBaseTreeEntry_t * const &left, CUtlSymbolTableLargeBaseTreeEntry_t * const &right ) const + { + // compare the hashes + if ( left->m_Hash == right->m_Hash ) + { + // if the hashes match compare the strings + if ( !CASEINSENSITIVE ) + return strcmp( left->String(), right->String() ) < 0; + else + return V_stricmp( left->String(), right->String() ) < 0; + } + else + { + return left->m_Hash < right->m_Hash; + } + } +}; + +// For non-threaded versions, simply index into CUtlRBTree +template< bool CASEINSENSITIVE > +class CNonThreadsafeTree : public CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree< CASEINSENSITIVE >, CASEINSENSITIVE > > +{ +public: + typedef CUtlRBTree<CUtlSymbolTableLargeBaseTreeEntry_t *, intp, CTreeEntryLess< CNonThreadsafeTree, CASEINSENSITIVE > > CNonThreadsafeTreeType; + + CNonThreadsafeTree() : + CNonThreadsafeTreeType( 0, 16 ) + { + } + inline void Commit() + { + // Nothing, only matters for thread-safe tables + } + inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) + { + return CNonThreadsafeTreeType::Insert( entry ); + } + inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) const + { + return CNonThreadsafeTreeType::Find( entry ); + } + inline int InvalidIndex() const + { + return CNonThreadsafeTreeType::InvalidIndex(); + } + inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const + { + CUtlVector< CUtlSymbolTableLargeBaseTreeEntry_t * > list; + list.EnsureCount( nCount ); + for ( int i = 0; i < nCount; ++i ) + { + pElements[ i ] = CNonThreadsafeTreeType::Element( i )->ToSymbol(); + } + + return nCount; + } +}; + +// Since CUtlSymbolTableLargeBaseTreeEntry_t already has the hash +// contained inside of it, don't need to recompute a hash here +template < int BUCKET_COUNT, class KEYTYPE, bool CASEINSENSITIVE > +class CCThreadsafeTreeHashMethod +{ +public: + static int Hash( const KEYTYPE &key, int nBucketMask ) + { + uint32 nHash = key->HashValue(); + return ( nHash & nBucketMask ); + } + + static bool Compare( CUtlSymbolTableLargeBaseTreeEntry_t * const &lhs, CUtlSymbolTableLargeBaseTreeEntry_t * const &rhs ) + { + if ( lhs->m_Hash != rhs->m_Hash ) + return false; + if ( !CASEINSENSITIVE ) + { + return ( !Q_strcmp( lhs->String(), rhs->String() ) ? true : false ); + } + + return ( !Q_stricmp( lhs->String(), rhs->String() ) ? true : false ); + } +}; + +/* + NOTE: So the only crappy thing about using a CUtlTSHash here is that the KEYTYPE is a CUtlSymbolTableLargeBaseTreeEntry_t ptr which has both the + hash and the string since with strings there is a good chance of hash collision after you have a fair number of strings so we have to implement + a Compare method (above) which falls back to strcmp/stricmp if the hashes are equal. This means that all of the data is in the KEYTYPE of the hash and the + payload doesn't matter. So I made the payload also be a pointer to a CUtlSymbolTableLargeBaseTreeEntry_t since that makes using the API more convenient + + TODO: If we have a CUtlTSHash that was all about the existence of the KEYTYPE and didn't require a payload (or template on 'void') then we could eliminate + 50% of the pointer overhead used for this data structure. +*/ + +// Thread safe version is based on the +template < bool CASEINSENSITIVE > +class CThreadsafeTree : public CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > > +{ +public: + typedef CUtlTSHash< CUtlSymbolTableLargeBaseTreeEntry_t *, 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CCThreadsafeTreeHashMethod< 2048, CUtlSymbolTableLargeBaseTreeEntry_t *, CASEINSENSITIVE > > CThreadsafeTreeType; + + CThreadsafeTree() : + CThreadsafeTreeType( 32 ) + { + } + inline void Commit() + { + CThreadsafeTreeType::Commit(); + } + inline int Insert( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) + { + return CThreadsafeTreeType::Insert( entry, entry ); + } + inline int Find( CUtlSymbolTableLargeBaseTreeEntry_t *entry ) + { + return CThreadsafeTreeType::Find( entry ); + } + inline int InvalidIndex() const + { + return CThreadsafeTreeType::InvalidHandle(); + } + inline int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const + { + CUtlVector< UtlTSHashHandle_t > list; + list.EnsureCount( nCount ); + int c = CThreadsafeTreeType::GetElements( nFirstElement, nCount, list.Base() ); + for ( int i = 0; i < c; ++i ) + { + pElements[ i ] = CThreadsafeTreeType::Element( list[ i ] )->ToSymbol(); + } + + return c; + } +}; + +// Base Class for threaded and non-threaded types +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE = MIN_STRING_POOL_SIZE > +class CUtlSymbolTableLargeBase +{ +public: + // constructor, destructor + CUtlSymbolTableLargeBase(); + ~CUtlSymbolTableLargeBase(); + + // Finds and/or creates a symbol based on the string + CUtlSymbolLarge AddString( const char* pString ); + + // Finds the symbol for pString + CUtlSymbolLarge Find( const char* pString ) const; + + // Remove all symbols in the table. + void RemoveAll(); + + int GetNumStrings( void ) const + { + return m_Lookup.Count(); + } + + void Commit() + { + m_Lookup.Commit(); + } + + // Returns elements in the table + int GetElements( int nFirstElement, int nCount, CUtlSymbolLarge *pElements ) const + { + return m_Lookup.GetElements( nFirstElement, nCount, pElements ); + } + + uint64 GetMemoryUsage() const + { + uint64 unBytesUsed = 0u; + + for ( int i=0; i < m_StringPools.Count(); i++ ) + { + StringPool_t *pPool = m_StringPools[i]; + + unBytesUsed += (uint64)pPool->m_TotalLen; + } + return unBytesUsed; + } + + +protected: + + struct StringPool_t + { + int m_TotalLen; // How large is + int m_SpaceUsed; + char m_Data[1]; + }; + + TreeType m_Lookup; + + // stores the string data + CUtlVector< StringPool_t * > m_StringPools; + +private: + int FindPoolWithSpace( int len ) const; +}; + +//----------------------------------------------------------------------------- +// constructor, destructor +//----------------------------------------------------------------------------- +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > +inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE >::CUtlSymbolTableLargeBase() : + m_StringPools( 8 ) +{ +} + +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > +inline CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::~CUtlSymbolTableLargeBase() +{ + // Release the stringpool string data + RemoveAll(); +} + +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > +inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::Find( const char* pString ) const +{ + VPROF( "CUtlSymbolLarge::Find" ); + if (!pString) + return CUtlSymbolLarge(); + + // Passing this special invalid symbol makes the comparison function + // use the string passed in the context + int len = Q_strlen( pString ) + 1; + + CUtlSymbolTableLargeBaseTreeEntry_t *search = (CUtlSymbolTableLargeBaseTreeEntry_t *)_alloca( len + sizeof( LargeSymbolTableHashDecoration_t ) ); + search->m_Hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, len ); + Q_memcpy( (char *)&search->m_String[ 0 ], pString, len ); + + int idx = const_cast< TreeType & >(m_Lookup).Find( search ); + + if ( idx == m_Lookup.InvalidIndex() ) + return UTL_INVAL_SYMBOL_LARGE; + + const CUtlSymbolTableLargeBaseTreeEntry_t *entry = m_Lookup[ idx ]; + return entry->ToSymbol(); +} + +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > +inline int CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::FindPoolWithSpace( int len ) const +{ + for ( int i=0; i < m_StringPools.Count(); i++ ) + { + StringPool_t *pPool = m_StringPools[i]; + + if ( (pPool->m_TotalLen - pPool->m_SpaceUsed) >= len ) + { + return i; + } + } + + return -1; +} + +//----------------------------------------------------------------------------- +// Finds and/or creates a symbol based on the string +//----------------------------------------------------------------------------- +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > +inline CUtlSymbolLarge CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::AddString( const char* pString ) +{ + VPROF("CUtlSymbolLarge::AddString"); + if (!pString) + return UTL_INVAL_SYMBOL_LARGE; + + CUtlSymbolLarge id = Find( pString ); + if ( id != UTL_INVAL_SYMBOL_LARGE ) + return id; + + int lenString = Q_strlen(pString) + 1; // length of just the string + int lenDecorated = lenString + sizeof(LargeSymbolTableHashDecoration_t); // and with its hash decoration + // make sure that all strings are aligned on 2-byte boundaries so the hashes will read correctly + // This assert seems to be invalid because LargeSymbolTableHashDecoration_t is always + // a uint32, by design. + //COMPILE_TIME_ASSERT(sizeof(LargeSymbolTableHashDecoration_t) == sizeof(intp)); + lenDecorated = ALIGN_VALUE(lenDecorated, sizeof( intp ) ); + + // Find a pool with space for this string, or allocate a new one. + int iPool = FindPoolWithSpace( lenDecorated ); + if ( iPool == -1 ) + { + // Add a new pool. + int newPoolSize = MAX( lenDecorated + sizeof( StringPool_t ), POOL_SIZE ); + StringPool_t *pPool = (StringPool_t*)malloc( newPoolSize ); + + pPool->m_TotalLen = newPoolSize - sizeof( StringPool_t ); + pPool->m_SpaceUsed = 0; + iPool = m_StringPools.AddToTail( pPool ); + } + + // Compute a hash + LargeSymbolTableHashDecoration_t hash = CUtlSymbolLarge_Hash( CASEINSENSITIVE, pString, lenString ); + + // Copy the string in. + StringPool_t *pPool = m_StringPools[iPool]; + // Assert( pPool->m_SpaceUsed < 0xFFFF ); // Pool could be bigger than 2k + // This should never happen, because if we had a string > 64k, it + // would have been given its entire own pool. + + CUtlSymbolTableLargeBaseTreeEntry_t *entry = ( CUtlSymbolTableLargeBaseTreeEntry_t * )&pPool->m_Data[ pPool->m_SpaceUsed ]; + + pPool->m_SpaceUsed += lenDecorated; + + entry->m_Hash = hash; + char *pText = (char *)&entry->m_String [ 0 ]; + Q_memcpy( pText, pString, lenString ); + + // insert the string into the database + MEM_ALLOC_CREDIT(); + int idx = m_Lookup.Insert( entry ); + return m_Lookup.Element( idx )->ToSymbol(); +} + +//----------------------------------------------------------------------------- +// Remove all symbols in the table. +//----------------------------------------------------------------------------- +template < class TreeType, bool CASEINSENSITIVE, size_t POOL_SIZE > +inline void CUtlSymbolTableLargeBase<TreeType, CASEINSENSITIVE, POOL_SIZE>::RemoveAll() +{ + m_Lookup.Purge(); + + for ( int i=0; i < m_StringPools.Count(); i++ ) + { + StringPool_t * pString = m_StringPools[i]; + free( pString ); + } + + m_StringPools.RemoveAll(); +} + +// Case-sensitive +typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< false >, false > CUtlSymbolTableLarge; +// Case-insensitive +typedef CUtlSymbolTableLargeBase< CNonThreadsafeTree< true >, true > CUtlSymbolTableLarge_CI; +// Multi-threaded case-sensitive +typedef CUtlSymbolTableLargeBase< CThreadsafeTree< false >, false > CUtlSymbolTableLargeMT; +// Multi-threaded case-insensitive +typedef CUtlSymbolTableLargeBase< CThreadsafeTree< true >, true > CUtlSymbolTableLargeMT_CI; + +#endif // UTLSYMBOLLARGE_H |