aboutsummaryrefslogtreecommitdiff
path: root/mp/src/public/tier1/strtools.h
diff options
context:
space:
mode:
Diffstat (limited to 'mp/src/public/tier1/strtools.h')
-rw-r--r--mp/src/public/tier1/strtools.h236
1 files changed, 228 insertions, 8 deletions
diff --git a/mp/src/public/tier1/strtools.h b/mp/src/public/tier1/strtools.h
index 035789f9..12e0a0c4 100644
--- a/mp/src/public/tier1/strtools.h
+++ b/mp/src/public/tier1/strtools.h
@@ -151,6 +151,26 @@ inline bool StringHasPrefix ( const char *str, const char *prefix )
inline bool StringHasPrefixCaseSensitive( const char *str, const char *prefix ) { return StringAfterPrefixCaseSensitive( str, prefix ) != NULL; }
+template< bool CASE_SENSITIVE > inline bool _V_strEndsWithInner( const char *pStr, const char *pSuffix )
+{
+ int nSuffixLen = V_strlen( pSuffix );
+ int nStringLen = V_strlen( pStr );
+ if ( nSuffixLen == 0 )
+ return true; // All strings end with the empty string (matches Java & .NET behaviour)
+ if ( nStringLen < nSuffixLen )
+ return false;
+ pStr += nStringLen - nSuffixLen;
+ if ( CASE_SENSITIVE )
+ return !V_strcmp( pStr, pSuffix );
+ else
+ return !V_stricmp( pStr, pSuffix );
+}
+
+// Does 'pStr' end with 'pSuffix'? (case sensitive/insensitive variants)
+inline bool V_strEndsWith( const char *pStr, const char *pSuffix ) { return _V_strEndsWithInner<TRUE>( pStr, pSuffix ); }
+inline bool V_striEndsWith( const char *pStr, const char *pSuffix ) { return _V_strEndsWithInner<FALSE>( pStr, pSuffix ); }
+
+
// Normalizes a float string in place.
// (removes leading zeros, trailing zeros after the decimal point, and the decimal point itself where possible)
void V_normalizeFloatString( char* pFloat );
@@ -220,6 +240,15 @@ template <size_t maxLenInChars> void V_strcpy_safe( OUT_Z_ARRAY char (&pDest)[ma
V_strncpy( pDest, pSrc, (int)maxLenInChars );
}
+// A function which duplicates a string using new[] to allocate the new string.
+inline char *V_strdup( const char *pSrc )
+{
+ int nLen = V_strlen( pSrc );
+ char *pResult = new char [ nLen+1 ];
+ V_memcpy( pResult, pSrc, nLen+1 );
+ return pResult;
+}
+
void V_wcsncpy( OUT_Z_BYTECAP(maxLenInBytes) wchar_t *pDest, wchar_t const *pSrc, int maxLenInBytes );
template <size_t maxLenInChars> void V_wcscpy_safe( OUT_Z_ARRAY wchar_t (&pDest)[maxLenInChars], wchar_t const *pSrc )
{
@@ -245,6 +274,164 @@ template <size_t cchDest> char *V_strlwr_safe( INOUT_Z_ARRAY char (&pBuf)[cchDes
return _V_strnlwr( pBuf, (int)cchDest );
}
+// Unicode string conversion policies - what to do if an illegal sequence is encountered
+enum EStringConvertErrorPolicy
+{
+ _STRINGCONVERTFLAG_SKIP = 1,
+ _STRINGCONVERTFLAG_FAIL = 2,
+ _STRINGCONVERTFLAG_ASSERT = 4,
+
+ STRINGCONVERT_REPLACE = 0,
+ STRINGCONVERT_SKIP = _STRINGCONVERTFLAG_SKIP,
+ STRINGCONVERT_FAIL = _STRINGCONVERTFLAG_FAIL,
+
+ STRINGCONVERT_ASSERT_REPLACE = _STRINGCONVERTFLAG_ASSERT + STRINGCONVERT_REPLACE,
+ STRINGCONVERT_ASSERT_SKIP = _STRINGCONVERTFLAG_ASSERT + STRINGCONVERT_SKIP,
+ STRINGCONVERT_ASSERT_FAIL = _STRINGCONVERTFLAG_ASSERT + STRINGCONVERT_FAIL,
+};
+
+// Unicode (UTF-8, UTF-16, UTF-32) fundamental conversion functions.
+bool Q_IsValidUChar32( uchar32 uValue );
+int Q_UChar32ToUTF8Len( uchar32 uValue );
+int Q_UChar32ToUTF8( uchar32 uValue, char *pOut );
+int Q_UChar32ToUTF16Len( uchar32 uValue );
+int Q_UChar32ToUTF16( uchar32 uValue, uchar16 *pOut );
+
+// Validate that a Unicode string is well-formed and contains only valid code points
+bool Q_UnicodeValidate( const char *pUTF8 );
+bool Q_UnicodeValidate( const uchar16 *pUTF16 );
+bool Q_UnicodeValidate( const uchar32 *pUTF32 );
+
+// Returns length of string in Unicode code points (printed glyphs or non-printing characters)
+int Q_UnicodeLength( const char *pUTF8 );
+int Q_UnicodeLength( const uchar16 *pUTF16 );
+int Q_UnicodeLength( const uchar32 *pUTF32 );
+
+// Returns length of string in elements, not characters! These are analogous to Q_strlen and Q_wcslen
+inline int Q_strlen16( const uchar16 *puc16 ) { int nElems = 0; while ( puc16[nElems] ) ++nElems; return nElems; }
+inline int Q_strlen32( const uchar32 *puc32 ) { int nElems = 0; while ( puc32[nElems] ) ++nElems; return nElems; }
+
+
+// Repair invalid Unicode strings by dropping truncated characters and fixing improperly-double-encoded UTF-16 sequences.
+// Unlike conversion functions which replace with '?' by default, a repair operation assumes that you know that something
+// is wrong with the string (eg, mid-sequence truncation) and you just want to do the best possible job of cleaning it up.
+// You can pass a REPLACE or FAIL policy if you would prefer to replace characters with '?' or clear the entire string.
+// Returns nonzero on success, or 0 if the policy is FAIL and an invalid sequence was found.
+int Q_UnicodeRepair( char *pUTF8, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_SKIP );
+int Q_UnicodeRepair( uchar16 *pUTF16, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_SKIP );
+int Q_UnicodeRepair( uchar32 *pUTF32, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_SKIP );
+
+// Advance pointer forward by N Unicode code points (printed glyphs or non-printing characters), stopping at terminating null if encountered.
+char *Q_UnicodeAdvance( char *pUTF8, int nCharacters );
+uchar16 *Q_UnicodeAdvance( uchar16 *pUTF16, int nCharactersnCharacters );
+uchar32 *Q_UnicodeAdvance( uchar32 *pUTF32, int nChars );
+inline const char *Q_UnicodeAdvance( const char *pUTF8, int nCharacters ) { return Q_UnicodeAdvance( (char*) pUTF8, nCharacters ); }
+inline const uchar16 *Q_UnicodeAdvance( const uchar16 *pUTF16, int nCharacters ) { return Q_UnicodeAdvance( (uchar16*) pUTF16, nCharacters ); }
+inline const uchar32 *Q_UnicodeAdvance( const uchar32 *pUTF32, int nCharacters ) { return Q_UnicodeAdvance( (uchar32*) pUTF32, nCharacters ); }
+
+// Truncate to maximum of N Unicode code points (printed glyphs or non-printing characters)
+inline void Q_UnicodeTruncate( char *pUTF8, int nCharacters ) { *Q_UnicodeAdvance( pUTF8, nCharacters ) = 0; }
+inline void Q_UnicodeTruncate( uchar16 *pUTF16, int nCharacters ) { *Q_UnicodeAdvance( pUTF16, nCharacters ) = 0; }
+inline void Q_UnicodeTruncate( uchar32 *pUTF32, int nCharacters ) { *Q_UnicodeAdvance( pUTF32, nCharacters ) = 0; }
+
+
+// Conversion between Unicode string types (UTF-8, UTF-16, UTF-32). Deals with bytes, not element counts,
+// to minimize harm from the programmer mistakes which continue to plague our wide-character string code.
+// Returns the number of bytes written to the output, or if output is NULL, the number of bytes required.
+int Q_UTF8ToUTF16( const char *pUTF8, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF8ToUTF32( const char *pUTF8, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF16ToUTF8( const uchar16 *pUTF16, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF16ToUTF32( const uchar16 *pUTF16, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF32ToUTF8( const uchar32 *pUTF32, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF32ToUTF16( const uchar32 *pUTF32, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+
+// This is disgusting and exist only easily to facilitate having 16-bit and 32-bit wchar_t's on different platforms
+int Q_UTF32ToUTF32( const uchar32 *pUTF32Source, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32Dest, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+
+// Conversion between count-limited UTF-n character arrays, including any potential NULL characters.
+// Output has a terminating NULL for safety; strip the last character if you want an unterminated string.
+// Returns the number of bytes written to the output, or if output is NULL, the number of bytes required.
+int Q_UTF8CharsToUTF16( const char *pUTF8, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF8CharsToUTF32( const char *pUTF8, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF16CharsToUTF8( const uchar16 *pUTF16, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF16CharsToUTF32( const uchar16 *pUTF16, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar32 *pUTF32, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF32CharsToUTF8( const uchar32 *pUTF32, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+int Q_UTF32CharsToUTF16( const uchar32 *pUTF32, int nElements, OUT_Z_BYTECAP(cubDestSizeInBytes) uchar16 *pUTF16, int cubDestSizeInBytes, EStringConvertErrorPolicy ePolicy = STRINGCONVERT_ASSERT_REPLACE );
+
+// Decode a single UTF-8 character to a uchar32, returns number of UTF-8 bytes parsed
+int Q_UTF8ToUChar32( const char *pUTF8_, uchar32 &uValueOut, bool &bErrorOut );
+
+// Decode a single UTF-16 character to a uchar32, returns number of UTF-16 characters (NOT BYTES) consumed
+int Q_UTF16ToUChar32( const uchar16 *pUTF16, uchar32 &uValueOut, bool &bErrorOut );
+
+
+// NOTE: WString means either UTF32 or UTF16 depending on the platform and compiler settings.
+#if defined( _MSC_VER ) || defined( _WIN32 )
+#define Q_UTF8ToWString Q_UTF8ToUTF16
+#define Q_UTF8CharsToWString Q_UTF8CharsToUTF16
+#define Q_UTF32ToWString Q_UTF32ToUTF16
+#define Q_WStringToUTF8 Q_UTF16ToUTF8
+#define Q_WStringCharsToUTF8 Q_UTF16CharsToUTF8
+#define Q_WStringToUTF32 Q_UTF16ToUTF32
+#else
+#define Q_UTF8ToWString Q_UTF8ToUTF32
+#define Q_UTF8CharsToWString Q_UTF8CharsToUTF32
+#define Q_UTF32ToWString Q_UTF32ToUTF32
+#define Q_WStringToUTF8 Q_UTF32ToUTF8
+#define Q_WStringCharsToUTF8 Q_UTF32CharsToUTF8
+#define Q_WStringToUTF32 Q_UTF32ToUTF32
+#endif
+
+// These are legacy names which don't make a lot of sense but are used everywhere. Prefer the WString convention wherever possible
+#define V_UTF8ToUnicode Q_UTF8ToWString
+#define V_UnicodeToUTF8 Q_WStringToUTF8
+
+
+#ifdef WIN32
+// This function is ill-defined as it relies on the current ANSI code page. Currently Win32 only for tools.
+int Q_LocaleSpecificANSIToUTF8( const char *pANSI, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
+#endif
+
+// Windows-1252 is mostly the same as ISO Latin-1, and probably what you want if you are
+// saddled with an 8-bit ANSI string that originated on a Windows system.
+int Q_Windows1252CharsToUTF8( const char *pchSrc, int cchSrc, OUT_Z_BYTECAP(cchDestUTF8) char *pchDestUTF8, int cchDestUTF8 );
+
+// CP 437 is used for VGA console text and some old-school file formats such as ZIP. It
+// is also known as the "IBM PC OEM code page" and various related names. You probably
+// don't want to use this function unless you know for a fact that you're dealing with
+// old-school OEM code pages. Otherwise try the Windows-1252 function above.
+int Q_CP437CharsToUTF8( const char *pchSrc, int cchSrc, OUT_Z_BYTECAP(cchDestUTF8) char *pchDestUTF8, int cchDestUTF8 );
+
+// replaces characters in a UTF8 string with their identical-looking equivalent (non-roundtrippable)
+//
+// older version of API uses a small homoglyph table; newer version uses a larger one
+//
+// strings using old version are baked into the database, so we won't toss it quite yet,
+// but don't use it for new features.
+int Q_NormalizeUTF8Old( const char *pchSrc, OUT_Z_CAP(cchDest) char *pchDest, int cchDest );
+int Q_NormalizeUTF8( const char *pchSrc, OUT_Z_CAP(cchDest) char *pchDest, int cchDest );
+
+//-----------------------------------------------------------------------------
+// Purpose: replaces characters in a UTF8 string with similar-looking equivalents.
+// Only replaces with ASCII characters.. non-recognized characters will be replaced with ?
+// This operation is destructive (i.e. you can't roundtrip through the normalized
+// form).
+//-----------------------------------------------------------------------------
+template <size_t maxLenInChars> int Q_NormalizeUTF8ToASCII( OUT_Z_ARRAY char (&pchDest)[maxLenInChars], const char *pchSrc )
+{
+ int nResult = Q_NormalizeUTF8( pchSrc, pchDest, maxLenInChars );
+
+ // replace non ASCII characters with ?
+ for ( int i = 0; i < nResult; i++ )
+ {
+ if ( pchDest[i] > 127 || pchDest[i] < 0 )
+ {
+ pchDest[i] = '?';
+ }
+ }
+
+ return nResult;
+}
// UNDONE: Find a non-compiler-specific way to do this
#ifdef _WIN32
@@ -321,13 +508,29 @@ char *V_pretifymem( float value, int digitsafterdecimal = 2, bool usebinaryonek
// Prints out a pretified integer with comma separators (eg, 7,233,270,000)
char *V_pretifynum( int64 value );
-// conversion functions wchar_t <-> char, returning the number of characters converted
-int V_UTF8ToUnicode( const char *pUTF8, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pwchDest, int cubDestSizeInBytes );
-int V_UnicodeToUTF8( const wchar_t *pUnicode, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
-int V_UCS2ToUnicode( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pUnicode, int cubDestSizeInBytes );
-int V_UCS2ToUTF8( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
-int V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUCS2, int cubDestSizeInBytes );
-int V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) ucs2 *pUCS2, int cubDestSizeInBytes );
+int _V_UCS2ToUnicode( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pUnicode, int cubDestSizeInBytes );
+template< typename T > inline int V_UCS2ToUnicode( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) wchar_t *pUnicode, T cubDestSizeInBytes )
+{
+ return _V_UCS2ToUnicode( pUCS2, pUnicode, static_cast<int>(cubDestSizeInBytes) );
+}
+
+int _V_UCS2ToUTF8( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, int cubDestSizeInBytes );
+template< typename T > inline int V_UCS2ToUTF8( const ucs2 *pUCS2, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUTF8, T cubDestSizeInBytes )
+{
+ return _V_UCS2ToUTF8( pUCS2, pUTF8, static_cast<int>(cubDestSizeInBytes) );
+}
+
+int _V_UnicodeToUCS2( const wchar_t *pUnicode, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUCS2, int cubDestSizeInBytes );
+template< typename T, typename U > inline int V_UnicodeToUCS2( const wchar_t *pUnicode, T cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) char *pUCS2, U cubDestSizeInBytes )
+{
+ return _V_UnicodeToUCS2( pUnicode, static_cast<int>(cubSrcInBytes), pUCS2, static_cast<int>(cubDestSizeInBytes) );
+}
+
+int _V_UTF8ToUCS2( const char *pUTF8, int cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) ucs2 *pUCS2, int cubDestSizeInBytes );
+template< typename T, typename U > inline int V_UTF8ToUCS2( const char *pUTF8, T cubSrcInBytes, OUT_Z_BYTECAP(cubDestSizeInBytes) ucs2 *pUCS2, U cubDestSizeInBytes )
+{
+ return _V_UTF8ToUCS2( pUTF8, static_cast<int>(cubSrcInBytes), pUCS2, static_cast<int>(cubDestSizeInBytes) );
+}
// strips leading and trailing whitespace; returns true if any characters were removed. UTF-8 and UTF-16 versions.
bool Q_StripPrecedingAndTrailingWhitespace( char *pch );
@@ -573,6 +776,7 @@ public:
m_pwch = NULL;
#if !defined( WIN32 ) && !defined(_WIN32)
m_pucs2 = NULL;
+ m_bCreatedUCS2 = false;
#endif
m_bCreatedUTF16 = false;
}
@@ -584,6 +788,7 @@ public:
m_pwch = pwch;
#if !defined( WIN32 ) && !defined(_WIN32)
m_pucs2 = NULL;
+ m_bCreatedUCS2 = false;
#endif
m_bCreatedUTF16 = true;
}
@@ -594,7 +799,8 @@ public:
m_pch = NULL;
m_pwch = NULL;
m_pucs2 = pwch;
- m_bCreatedUTF16 = true;
+ m_bCreatedUCS2 = true;
+ m_bCreatedUTF16 = false;
}
#endif
@@ -652,6 +858,10 @@ public:
{
delete [] m_pwch;
}
+#if !defined( WIN32 ) && !defined(_WIN32)
+ if ( !m_bCreatedUCS2 && m_pucs2 )
+ delete [] m_pucs2;
+#endif
}
private:
@@ -730,6 +940,8 @@ private:
// so we perform a second allocation that's just the size we need.
void PopulateUCS2()
{
+ if ( m_bCreatedUCS2 )
+ return;
if ( m_pch == NULL )
return; // no UTF-8 string to convert
if ( m_pucs2 != NULL )
@@ -760,6 +972,7 @@ private:
const wchar_t *m_pwch;
#if !defined( WIN32 ) && !defined(_WIN32)
const ucs2 *m_pucs2;
+ bool m_bCreatedUCS2;
#endif
// "created as UTF-16", means our owned string is the UTF-8 string not the UTF-16 one.
bool m_bCreatedUTF16;
@@ -868,4 +1081,11 @@ size_t Q_URLDecode( OUT_CAP(nDecodeDestLen) char *pchDecodeDest, int nDecodeDest
#endif // !defined( VSTDLIB_DLL_EXPORT )
+#ifdef POSIX
+#define FMT_WS L"%ls"
+#else
+#define FMT_WS L"%s"
+#endif
+
+
#endif // TIER1_STRTOOLS_H