1 files changed, 714 insertions, 0 deletions
diff --git a/tier1/keyvaluesjson.cpp b/tier1/keyvaluesjson.cpp
new file mode 100644
index 0000000..7d3dabf
--- /dev/null
+++ b/tier1/keyvaluesjson.cpp
@@ -0,0 +1,714 @@
+//========= Copyright Valve Corporation, All rights reserved. =================//
+//
+// Read JSON-formatted data into KeyValues
+//
+//=============================================================================//
+
+#include "tier1/keyvaluesjson.h"
+#include "tier1/utlbuffer.h"
+#include "tier1/strtools.h"
+#include <stdint.h> // INT32_MIN defn
+
+KeyValuesJSONParser::KeyValuesJSONParser( const CUtlBuffer &buf )
+{
+	Init( (const char *)buf.Base(), buf.TellPut() );
+}
+
+KeyValuesJSONParser::KeyValuesJSONParser( const char *pszText, int cbSize )
+{
+	Init( pszText, cbSize >= 0 ? cbSize : V_strlen(pszText) );
+}
+
+KeyValuesJSONParser::~KeyValuesJSONParser() {}
+
+void KeyValuesJSONParser::Init( const char *pszText, int cbSize )
+{
+	m_szErrMsg[0] = '\0';
+	m_nLine = 1;
+	m_cur = pszText;
+	m_end = pszText+cbSize;
+
+	m_eToken = kToken_Null;
+	NextToken();
+}
+
+KeyValues *KeyValuesJSONParser::ParseFile()
+{
+	// A valid JSON object should contain a single object, surrounded by curly braces.
+	if ( m_eToken == kToken_EOF )
+	{
+		V_sprintf_safe( m_szErrMsg, "Input contains no data" );
+		return NULL;
+	}
+	if ( m_eToken == kToken_Err )
+		return NULL;
+	if ( m_eToken == '{' )
+	{
+
+		// Parse the the entire file as one big object
+		KeyValues *pResult = new KeyValues("");
+		if ( !ParseObject( pResult ) )
+		{
+			pResult->deleteThis();
+			return NULL;
+		}
+		if ( m_eToken == kToken_EOF )
+			return pResult;
+		pResult->deleteThis();
+	}
+	V_sprintf_safe( m_szErrMsg, "%s not expected here.  A valid JSON document should be a single object, which begins with '{' and ends with '}'", GetTokenDebugText() );
+	return NULL;
+}
+
+bool KeyValuesJSONParser::ParseObject( KeyValues *pObject )
+{
+	Assert( m_eToken == '{' );
+	int nOpenDelimLine = m_nLine;
+	NextToken();
+	KeyValues *pLastChild = NULL;
+	while ( m_eToken != '}' )
+	{
+		// Parse error?
+		if ( m_eToken == kToken_Err )
+			return false;
+		if ( m_eToken == kToken_EOF )
+		{
+			// Actually report the error at the line of the unmatched delimiter.
+			// There's no need to report the line number of the end of file, that is always
+			// useless.
+			m_nLine = nOpenDelimLine;
+			V_strcpy_safe( m_szErrMsg, "End of input was reached and '{' was not matched by '}'" );
+			return false;
+		}
+
+		// It must be a string, for the key name
+		if ( m_eToken != kToken_String )
+		{
+			V_sprintf_safe( m_szErrMsg, "%s not expected here; expected string for key name or '}'", GetTokenDebugText() );
+			return false;
+		}
+
+		KeyValues *pChildValue = new KeyValues( m_vecTokenChars.Base() );
+		NextToken();
+
+		// Expect and eat colon
+		if ( m_eToken != ':' )
+		{
+			V_sprintf_safe( m_szErrMsg, "%s not expected here.  Missing ':'?", GetTokenDebugText() );
+			pChildValue->deleteThis();
+			return false;
+		}
+		NextToken();
+
+		// Recursively parse the value
+		if ( !ParseValue( pChildValue ) )
+		{
+			pChildValue->deleteThis();
+			return false;
+		}
+
+		// Add to parent.
+		pObject->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
+		pLastChild = pChildValue;
+
+		// Eat the comma, if there is one.  If no comma,
+		// then the other thing that could come next
+		// is the closing brace to close the object
+		// NOTE: We are allowing the extra comma after the last item
+		if ( m_eToken == ',' )
+		{
+			NextToken();
+		}
+		else if ( m_eToken != '}' )
+		{
+			V_sprintf_safe( m_szErrMsg, "%s not expected here.  Missing ',' or '}'?", GetTokenDebugText() );
+			return false;
+		}
+	}
+
+	// Eat closing '}'
+	NextToken();
+
+	// Success
+	return true;
+}
+
+bool KeyValuesJSONParser::ParseArray( KeyValues *pArray )
+{
+	Assert( m_eToken == '[' );
+	int nOpenDelimLine = m_nLine;
+	NextToken();
+	KeyValues *pLastChild = NULL;
+	int idx = 0;
+	while ( m_eToken != ']' )
+	{
+		// Parse error?
+		if ( m_eToken == kToken_Err )
+			return false;
+		if ( m_eToken == kToken_EOF )
+		{
+			// Actually report the error at the line of the unmatched delimiter.
+			// There's no need to report the line number of the end of file, that is always
+			// useless.
+			m_nLine = nOpenDelimLine;
+			V_strcpy_safe( m_szErrMsg, "End of input was reached and '[' was not matched by ']'" );
+			return false;
+		}
+
+		// Set a dummy key name based on the index
+		char szKeyName[ 32 ];
+		V_sprintf_safe( szKeyName, "%d", idx );
+		++idx;
+		KeyValues *pChildValue = new KeyValues( szKeyName );
+
+		// Recursively parse the value
+		if ( !ParseValue( pChildValue ) )
+		{
+			pChildValue->deleteThis();
+			return false;
+		}
+
+		// Add to parent.
+		pArray->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
+		pLastChild = pChildValue;
+
+		// Handle a colon here specially.  If one appears, the odds are they
+		// are trying to put object-like data inside of an array
+		if ( m_eToken == ':' )
+		{
+			V_sprintf_safe( m_szErrMsg, "':' not expected inside an array.  ('[]' used when '{}' was intended?)" );
+			return false;
+		}
+
+		// Eat the comma, if there is one.  If no comma,
+		// then the other thing that could come next
+		// is the closing brace to close the object
+		// NOTE: We are allowing the extra comma after the last item
+		if ( m_eToken == ',' )
+		{
+			NextToken();
+		}
+		else if ( m_eToken != ']' )
+		{
+			V_sprintf_safe( m_szErrMsg, "%s not expected here.  Missing ',' or ']'?", GetTokenDebugText() );
+			return false;
+		}
+	}
+
+	// Eat closing ']'
+	NextToken();
+
+	// Success
+	return true;
+}
+
+bool KeyValuesJSONParser::ParseValue( KeyValues *pValue )
+{
+	switch ( m_eToken )
+	{
+		case '{': return ParseObject( pValue );
+		case '[': return ParseArray( pValue );
+		case kToken_String:
+			pValue->SetString( NULL, m_vecTokenChars.Base() );
+			NextToken();
+			return true;
+
+		case kToken_NumberInt:
+		{
+			const char *pszNum = m_vecTokenChars.Base();
+
+			// Negative?
+			if ( *pszNum == '-' )
+			{
+				int64 val64 = V_atoi64( pszNum );
+				if ( val64 < INT32_MIN )
+				{
+					// !KLUDGE! KeyValues cannot support this!
+					V_sprintf_safe( m_szErrMsg, "%s is out of range for KeyValues, which doesn't support signed 64-bit numbers", pszNum );
+					return false;
+				}
+
+				pValue->SetInt( NULL, (int)val64 );
+			}
+			else
+			{
+				uint64 val64 = V_atoui64( pszNum );
+				if ( val64 > 0x7fffffffU )
+				{
+					pValue->SetUint64( NULL, val64 );
+				}
+				else
+				{
+					pValue->SetInt( NULL, (int)val64 );
+				}
+			}
+			NextToken();
+			return true;
+		}
+
+		case kToken_NumberFloat:
+		{
+			float f = V_atof( m_vecTokenChars.Base() );
+			pValue->SetFloat( NULL, f );
+			NextToken();
+			return true;
+		}
+
+		case kToken_True:
+			pValue->SetBool( NULL, true );
+			NextToken();
+			return true;
+
+		case kToken_False:
+			pValue->SetBool( NULL, false );
+			NextToken();
+			return true;
+
+		case kToken_Null:
+			pValue->SetPtr( NULL, NULL );
+			NextToken();
+			return true;
+
+		case kToken_Err:
+			return false;
+	}
+
+	V_sprintf_safe( m_szErrMsg, "%s not expected here; missing value?", GetTokenDebugText() );
+	return false;
+}
+
+void KeyValuesJSONParser::NextToken()
+{
+
+	// Already in terminal state?
+	if ( m_eToken < 0 )
+		return;
+
+	// Clear token
+	m_vecTokenChars.SetCount(0);
+
+	// Scan until we hit the end of input
+	while ( m_cur < m_end )
+	{
+
+		// Next character?
+		char c = *m_cur;
+		switch (c)
+		{
+			// Whitespace?  Eat it and keep parsing
+			case ' ':
+			case '\t':
+				++m_cur;
+				break;
+
+			// Newline?  Eat it and advance line number
+			case '\n':
+			case '\r':
+				++m_nLine;
+				++m_cur;
+
+				// Eat \r\n or \n\r pair as a single character
+				if ( m_cur < m_end && *m_cur == ( '\n' + '\r' - c ) )
+					++m_cur;
+				break;
+
+			// Single-character JSON token?
+			case ':':
+			case '{':
+			case '}':
+			case '[':
+			case ']':
+			case ',':
+				m_eToken = c;
+				++m_cur;
+				return;
+
+			// String?
+			case '\"':
+			case '\'': // NOTE: We allow strings to be delimited by single quotes, which is not JSON compliant
+				ParseStringToken();
+				return;
+
+			case '-':
+			case '.':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				ParseNumberToken();
+				return;
+
+			// Literal "true"
+			case 't':
+				if ( m_cur + 4 <= m_end && m_cur[1] == 'r' && m_cur[2] == 'u' && m_cur[3] == 'e' )
+				{
+					m_cur += 4;
+					m_eToken = kToken_True;
+					return;
+				}
+				goto unexpected_char;
+
+			// Literal "false"
+			case 'f':
+				if ( m_cur + 5 <= m_end && m_cur[1] == 'a' && m_cur[2] == 'l' && m_cur[3] == 's' && m_cur[4] == 'e' )
+				{
+					m_cur += 5;
+					m_eToken = kToken_False;
+					return;
+				}
+				goto unexpected_char;
+
+			// Literal "null"
+			case 'n':
+				if ( m_cur + 4 <= m_end && m_cur[1] == 'u' && m_cur[2] == 'l' && m_cur[3] == 'l' )
+				{
+					m_cur += 4;
+					m_eToken = kToken_Null;
+					return;
+				}
+				goto unexpected_char;
+
+			case '/':
+				// C++-style comment?
+				if ( m_cur < m_end && m_cur[1] == '/' )
+				{
+					m_cur += 2;
+					while ( m_cur < m_end && *m_cur != '\n' && *m_cur != '\r' )
+						++m_cur;
+					// Leave newline as the next character, we'll handle it above
+					break;
+				}
+				// | fall 
+				// | through
+				// V
+
+			default:
+			unexpected_char:
+				if ( V_isprint(c) )
+					V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x ('%c')", (uint8)c, c );
+				else
+					V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x", (uint8)c );
+				m_eToken = kToken_Err;
+				return;
+		}
+	}
+
+	m_eToken = kToken_EOF;
+}
+
+void KeyValuesJSONParser::ParseNumberToken()
+{
+	// Clear token
+	m_vecTokenChars.SetCount(0);
+
+	// Eat leading minus sign
+	if ( *m_cur	== '-' )
+	{
+		m_vecTokenChars.AddToTail( '-' );
+		++m_cur;
+	}
+
+	if ( m_cur >= m_end )
+	{
+		V_strcpy_safe( m_szErrMsg, "Unexpected EOF while parsing number" );
+		m_eToken = kToken_Err;
+		return;
+	}
+
+	char c = *m_cur;
+	m_vecTokenChars.AddToTail( c );
+	bool bHasWholePart = false;
+	switch ( c )
+	{
+		case '0':
+			// Leading 0 cannot be followed by any more digits, as per JSON spec (and to make sure nobody tries to parse octal).
+			++m_cur;
+			bHasWholePart = true;
+			break;
+
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			bHasWholePart = true;
+			++m_cur;
+
+			// Accumulate digits until we hit a non-digit
+			while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
+				m_vecTokenChars.AddToTail( *(m_cur++) );
+			break;
+
+		case '.':
+			// strict JSON doesn't allow a number that starts with a decimal point, but we do
+			break;
+	}
+
+	// Assume this is integral, unless we hit a decimal point and/or exponent
+	m_eToken = kToken_NumberInt;
+
+	// Fractional portion?
+	if ( m_cur < m_end && *m_cur == '.' )
+	{
+		m_eToken = kToken_NumberFloat;
+
+		// Eat decimal point
+		m_vecTokenChars.AddToTail( *(m_cur++) );
+
+		// Accumulate digits until we hit a non-digit
+		bool bHasFractionPart = false;
+		while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
+		{
+			m_vecTokenChars.AddToTail( *(m_cur++) );
+			bHasFractionPart = true;
+		}
+
+		// Make sure we aren't just a single '.'
+		if ( !bHasWholePart && !bHasFractionPart )
+		{
+			m_vecTokenChars.AddToTail(0);
+			V_sprintf_safe( m_szErrMsg, "Invalid number starting with '%s'", m_vecTokenChars.Base() );
+			m_eToken = kToken_Err;
+			return;
+		}
+	}
+
+	// Exponent?
+	if ( m_cur < m_end && ( *m_cur == 'e' || *m_cur == 'E' ) )
+	{
+		m_eToken = kToken_NumberFloat;
+
+		// Eat 'e'
+		m_vecTokenChars.AddToTail( *(m_cur++) );
+
+		// Optional sign
+		if ( m_cur < m_end && ( *m_cur == '-' || *m_cur == '+' ) )
+			m_vecTokenChars.AddToTail( *(m_cur++) );
+
+		// Accumulate digits until we hit a non-digit
+		bool bHasExponentDigit = false;
+		while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
+		{
+			m_vecTokenChars.AddToTail( *(m_cur++) );
+			bHasExponentDigit = true;
+		}
+		if ( !bHasExponentDigit )
+		{
+			V_strcpy_safe( m_szErrMsg, "Bad exponent in floating point number" );
+			m_eToken = kToken_Err;
+			return;
+		}
+	}
+
+	// OK, We have parsed a valid number.
+	// Terminate token
+	m_vecTokenChars.AddToTail( '\0' );
+
+	// EOF?  That's OK for now, at this lexical parsing level.  We'll handle the error
+	// at the higher parse level, when expecting a comma or closing delimiter
+	if ( m_cur >= m_end )
+		return;
+
+	// Is the next thing a valid character?  This is the most common case.
+	c = *m_cur;
+	if ( V_isspace( c ) || c == ',' || c == '}' || c == ']' || c == '/' )
+		return;
+
+	// Handle these guys as "tokens", to provide a slightly more meaningful error message
+	if ( c == '[' || c == '{' )
+		return;
+
+	// Anything else, treat the whole thing as an invalid numerical constant
+	if ( V_isprint(c) )
+		V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x ('%c')", (uint8)c, c );
+	else
+		V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x", (uint8)c );
+	m_eToken = kToken_Err;
+}
+
+void KeyValuesJSONParser::ParseStringToken()
+{
+	char cDelim = *(m_cur++);
+
+	while ( m_cur < m_end )
+	{
+		char c = *(m_cur++);
+		if ( c == '\r' || c == '\n' )
+		{
+			V_sprintf_safe( m_szErrMsg, "Hit end of line before closing quote (%c)", c );
+			m_eToken = kToken_Err;
+			return;
+		}
+		if ( c == cDelim )
+		{
+			m_eToken = kToken_String;
+			m_vecTokenChars.AddToTail( '\0' );
+			return;
+		}
+
+		// Ordinary character?  Just append it
+		if ( c != '\\' )
+		{
+			m_vecTokenChars.AddToTail( c );
+			continue;
+		}
+
+		// Escaped character.
+		// End of string?  We'll handle it above
+		if ( m_cur >= m_end )
+			continue;
+
+		// Check table of allowed escape characters
+		switch (c)
+		{
+			case '\\':
+			case '/':
+			case '\'':
+			case '\"': m_vecTokenChars.AddToTail( c ); break;
+			case 'b': m_vecTokenChars.AddToTail( '\b' ); break;
+			case 'f': m_vecTokenChars.AddToTail( '\f' ); break;
+			case 'n': m_vecTokenChars.AddToTail( '\n' ); break;
+			case 'r': m_vecTokenChars.AddToTail( '\r' ); break;
+			case 't': m_vecTokenChars.AddToTail( '\t' ); break;
+
+			case 'u':
+			{
+
+				// Make sure are followed by exactly 4 hex digits
+				if ( m_cur + 4 > m_end || !V_isxdigit( m_cur[0] ) || !V_isxdigit( m_cur[1] ) || !V_isxdigit( m_cur[2] ) || !V_isxdigit( m_cur[3] ) )
+				{
+					V_sprintf_safe( m_szErrMsg, "\\u must be followed by exactly 4 hex digits" );
+					m_eToken = kToken_Err;
+					return;
+				}
+
+				// Parse the codepoint
+				uchar32 nCodePoint = 0;
+				for ( int n = 0 ; n < 4 ; ++n )
+				{
+					nCodePoint <<= 4;
+					char chHex = *(m_cur++);
+					if ( chHex >= '0' && chHex <= '9' )
+						nCodePoint += chHex - '0';
+					else if ( chHex >= 'a' && chHex <= 'a' )
+						nCodePoint += chHex + 0x0a - 'a';
+					else if ( chHex >= 'A' && chHex <= 'A' )
+						nCodePoint += chHex + 0x0a - 'A';
+					else
+						Assert( false ); // inconceivable, due to above
+				}
+
+				// Encode it in UTF-8
+				char utf8Encode[8];
+				int r = Q_UChar32ToUTF8( nCodePoint, utf8Encode );
+				if ( r < 0 || r > 4 )
+				{
+					V_sprintf_safe( m_szErrMsg, "Invalid code point \\u%04x", nCodePoint );
+					m_eToken = kToken_Err;
+					return;
+				}
+				for ( int i = 0 ; i < r ; ++i )
+					m_vecTokenChars.AddToTail( utf8Encode[i] );
+			} break;
+
+			default:
+				if ( V_isprint(c) )
+					V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x ('\\%c')", (uint8)c, c );
+				else
+					V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x", (uint8)c );
+				m_eToken = kToken_Err;
+				return;
+		}
+	}
+
+	V_sprintf_safe( m_szErrMsg, "Hit end of input before closing quote (%c)", cDelim );
+	m_eToken = kToken_Err;
+}
+
+const char *KeyValuesJSONParser::GetTokenDebugText()
+{
+	switch ( m_eToken )
+	{
+		case kToken_EOF: return "<EOF>";
+		case kToken_String: return "<string>";
+		case kToken_NumberInt:
+		case kToken_NumberFloat: return "<number>";
+		case kToken_True: return "'true'";
+		case kToken_False: return "'false'";
+		case kToken_Null: return "'null'";
+		case '{': return "'{'";
+		case '}': return "'}'";
+		case '[': return "'['";
+		case ']': return "']'";
+		case ':': return "':'";
+		case ',': return "','";
+	}
+
+	// We shouldn't ever need to ask for a debug string for the error token,
+	// and anything else is an error
+	Assert( false );
+	return "<parse error>";
+}
+
+#ifdef _DEBUG
+
+static void JSONTest_ParseValid( const char *pszData )
+{
+	KeyValuesJSONParser parser( pszData );
+	KeyValues *pFile = parser.ParseFile();
+	Assert( pFile );
+	pFile->deleteThis();
+}
+
+static void JSONTest_ParseInvalid( const char *pszData, const char *pszExpectedErrMsgSnippet, int nExpectedFailureLine )
+{
+	KeyValuesJSONParser parser( pszData );
+	KeyValues *pFile = parser.ParseFile();
+	Assert( pFile == NULL );
+	Assert( V_stristr( parser.m_szErrMsg, pszExpectedErrMsgSnippet ) != NULL );
+	Assert( parser.m_nLine == nExpectedFailureLine );
+}
+
+void TestKeyValuesJSONParser()
+{
+	JSONTest_ParseValid( "{}" );
+	JSONTest_ParseValid( R"JSON({
+		"key": "string_value",
+		"pos_int32": 123,
+		"pos_int64": 123456789012,
+		"neg_int32": -456,
+		"float": -45.23,
+		"pos_exponent": 1e30,
+		"neg_exponent": 1e-16,
+		"decimal_and_exponent": 1.e+30,
+		"no_leading_zero": .7, // we support this, even though strict JSON says it's no good
+		"zero": 0,
+		"true_value": true,
+		"false_value": false,
+		"null_value": null,
+		"with_escaped": "\r \t \n",
+		"unicode": "\u1234 \\u12f3",
+		"array_of_ints": [ 1, 2, 3, -45 ],
+		"empty_array": [],
+		"array_with_stuff_inside": [
+			{}, // this is a comment.
+			[ 0.45, {}, "hello!" ],
+			{ "id": 0 },
+			// Trailing comma above.  Comment here
+		],
+	})JSON" );
+	JSONTest_ParseInvalid( "{ \"key\": 123", "missing", 1 );
+	JSONTest_ParseInvalid( "{ \"key\": 123.4f }", "number", 1 );
+}
+
+#endif