summaryrefslogtreecommitdiff
path: root/tier1/keyvaluesjson.cpp
diff options
context:
space:
mode:
authorFluorescentCIAAfricanAmerican <[email protected]>2020-04-22 12:56:21 -0400
committerFluorescentCIAAfricanAmerican <[email protected]>2020-04-22 12:56:21 -0400
commit3bf9df6b2785fa6d951086978a3e66f49427166a (patch)
tree2c0f1f0c63c4832882bc93814ebd2c2b1c6224e5 /tier1/keyvaluesjson.cpp
downloadarchived-source-engine-2018-hl2-src-master.tar.xz
archived-source-engine-2018-hl2-src-master.zip
Diffstat (limited to 'tier1/keyvaluesjson.cpp')
-rw-r--r--tier1/keyvaluesjson.cpp714
1 files changed, 714 insertions, 0 deletions
diff --git a/tier1/keyvaluesjson.cpp b/tier1/keyvaluesjson.cpp
new file mode 100644
index 0000000..7d3dabf
--- /dev/null
+++ b/tier1/keyvaluesjson.cpp
@@ -0,0 +1,714 @@
+//========= Copyright Valve Corporation, All rights reserved. =================//
+//
+// Read JSON-formatted data into KeyValues
+//
+//=============================================================================//
+
+#include "tier1/keyvaluesjson.h"
+#include "tier1/utlbuffer.h"
+#include "tier1/strtools.h"
+#include <stdint.h> // INT32_MIN defn
+
+KeyValuesJSONParser::KeyValuesJSONParser( const CUtlBuffer &buf )
+{
+ Init( (const char *)buf.Base(), buf.TellPut() );
+}
+
+KeyValuesJSONParser::KeyValuesJSONParser( const char *pszText, int cbSize )
+{
+ Init( pszText, cbSize >= 0 ? cbSize : V_strlen(pszText) );
+}
+
+KeyValuesJSONParser::~KeyValuesJSONParser() {}
+
+void KeyValuesJSONParser::Init( const char *pszText, int cbSize )
+{
+ m_szErrMsg[0] = '\0';
+ m_nLine = 1;
+ m_cur = pszText;
+ m_end = pszText+cbSize;
+
+ m_eToken = kToken_Null;
+ NextToken();
+}
+
+KeyValues *KeyValuesJSONParser::ParseFile()
+{
+ // A valid JSON object should contain a single object, surrounded by curly braces.
+ if ( m_eToken == kToken_EOF )
+ {
+ V_sprintf_safe( m_szErrMsg, "Input contains no data" );
+ return NULL;
+ }
+ if ( m_eToken == kToken_Err )
+ return NULL;
+ if ( m_eToken == '{' )
+ {
+
+ // Parse the the entire file as one big object
+ KeyValues *pResult = new KeyValues("");
+ if ( !ParseObject( pResult ) )
+ {
+ pResult->deleteThis();
+ return NULL;
+ }
+ if ( m_eToken == kToken_EOF )
+ return pResult;
+ pResult->deleteThis();
+ }
+ V_sprintf_safe( m_szErrMsg, "%s not expected here. A valid JSON document should be a single object, which begins with '{' and ends with '}'", GetTokenDebugText() );
+ return NULL;
+}
+
+bool KeyValuesJSONParser::ParseObject( KeyValues *pObject )
+{
+ Assert( m_eToken == '{' );
+ int nOpenDelimLine = m_nLine;
+ NextToken();
+ KeyValues *pLastChild = NULL;
+ while ( m_eToken != '}' )
+ {
+ // Parse error?
+ if ( m_eToken == kToken_Err )
+ return false;
+ if ( m_eToken == kToken_EOF )
+ {
+ // Actually report the error at the line of the unmatched delimiter.
+ // There's no need to report the line number of the end of file, that is always
+ // useless.
+ m_nLine = nOpenDelimLine;
+ V_strcpy_safe( m_szErrMsg, "End of input was reached and '{' was not matched by '}'" );
+ return false;
+ }
+
+ // It must be a string, for the key name
+ if ( m_eToken != kToken_String )
+ {
+ V_sprintf_safe( m_szErrMsg, "%s not expected here; expected string for key name or '}'", GetTokenDebugText() );
+ return false;
+ }
+
+ KeyValues *pChildValue = new KeyValues( m_vecTokenChars.Base() );
+ NextToken();
+
+ // Expect and eat colon
+ if ( m_eToken != ':' )
+ {
+ V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ':'?", GetTokenDebugText() );
+ pChildValue->deleteThis();
+ return false;
+ }
+ NextToken();
+
+ // Recursively parse the value
+ if ( !ParseValue( pChildValue ) )
+ {
+ pChildValue->deleteThis();
+ return false;
+ }
+
+ // Add to parent.
+ pObject->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
+ pLastChild = pChildValue;
+
+ // Eat the comma, if there is one. If no comma,
+ // then the other thing that could come next
+ // is the closing brace to close the object
+ // NOTE: We are allowing the extra comma after the last item
+ if ( m_eToken == ',' )
+ {
+ NextToken();
+ }
+ else if ( m_eToken != '}' )
+ {
+ V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or '}'?", GetTokenDebugText() );
+ return false;
+ }
+ }
+
+ // Eat closing '}'
+ NextToken();
+
+ // Success
+ return true;
+}
+
+bool KeyValuesJSONParser::ParseArray( KeyValues *pArray )
+{
+ Assert( m_eToken == '[' );
+ int nOpenDelimLine = m_nLine;
+ NextToken();
+ KeyValues *pLastChild = NULL;
+ int idx = 0;
+ while ( m_eToken != ']' )
+ {
+ // Parse error?
+ if ( m_eToken == kToken_Err )
+ return false;
+ if ( m_eToken == kToken_EOF )
+ {
+ // Actually report the error at the line of the unmatched delimiter.
+ // There's no need to report the line number of the end of file, that is always
+ // useless.
+ m_nLine = nOpenDelimLine;
+ V_strcpy_safe( m_szErrMsg, "End of input was reached and '[' was not matched by ']'" );
+ return false;
+ }
+
+ // Set a dummy key name based on the index
+ char szKeyName[ 32 ];
+ V_sprintf_safe( szKeyName, "%d", idx );
+ ++idx;
+ KeyValues *pChildValue = new KeyValues( szKeyName );
+
+ // Recursively parse the value
+ if ( !ParseValue( pChildValue ) )
+ {
+ pChildValue->deleteThis();
+ return false;
+ }
+
+ // Add to parent.
+ pArray->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild );
+ pLastChild = pChildValue;
+
+ // Handle a colon here specially. If one appears, the odds are they
+ // are trying to put object-like data inside of an array
+ if ( m_eToken == ':' )
+ {
+ V_sprintf_safe( m_szErrMsg, "':' not expected inside an array. ('[]' used when '{}' was intended?)" );
+ return false;
+ }
+
+ // Eat the comma, if there is one. If no comma,
+ // then the other thing that could come next
+ // is the closing brace to close the object
+ // NOTE: We are allowing the extra comma after the last item
+ if ( m_eToken == ',' )
+ {
+ NextToken();
+ }
+ else if ( m_eToken != ']' )
+ {
+ V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or ']'?", GetTokenDebugText() );
+ return false;
+ }
+ }
+
+ // Eat closing ']'
+ NextToken();
+
+ // Success
+ return true;
+}
+
+bool KeyValuesJSONParser::ParseValue( KeyValues *pValue )
+{
+ switch ( m_eToken )
+ {
+ case '{': return ParseObject( pValue );
+ case '[': return ParseArray( pValue );
+ case kToken_String:
+ pValue->SetString( NULL, m_vecTokenChars.Base() );
+ NextToken();
+ return true;
+
+ case kToken_NumberInt:
+ {
+ const char *pszNum = m_vecTokenChars.Base();
+
+ // Negative?
+ if ( *pszNum == '-' )
+ {
+ int64 val64 = V_atoi64( pszNum );
+ if ( val64 < INT32_MIN )
+ {
+ // !KLUDGE! KeyValues cannot support this!
+ V_sprintf_safe( m_szErrMsg, "%s is out of range for KeyValues, which doesn't support signed 64-bit numbers", pszNum );
+ return false;
+ }
+
+ pValue->SetInt( NULL, (int)val64 );
+ }
+ else
+ {
+ uint64 val64 = V_atoui64( pszNum );
+ if ( val64 > 0x7fffffffU )
+ {
+ pValue->SetUint64( NULL, val64 );
+ }
+ else
+ {
+ pValue->SetInt( NULL, (int)val64 );
+ }
+ }
+ NextToken();
+ return true;
+ }
+
+ case kToken_NumberFloat:
+ {
+ float f = V_atof( m_vecTokenChars.Base() );
+ pValue->SetFloat( NULL, f );
+ NextToken();
+ return true;
+ }
+
+ case kToken_True:
+ pValue->SetBool( NULL, true );
+ NextToken();
+ return true;
+
+ case kToken_False:
+ pValue->SetBool( NULL, false );
+ NextToken();
+ return true;
+
+ case kToken_Null:
+ pValue->SetPtr( NULL, NULL );
+ NextToken();
+ return true;
+
+ case kToken_Err:
+ return false;
+ }
+
+ V_sprintf_safe( m_szErrMsg, "%s not expected here; missing value?", GetTokenDebugText() );
+ return false;
+}
+
+void KeyValuesJSONParser::NextToken()
+{
+
+ // Already in terminal state?
+ if ( m_eToken < 0 )
+ return;
+
+ // Clear token
+ m_vecTokenChars.SetCount(0);
+
+ // Scan until we hit the end of input
+ while ( m_cur < m_end )
+ {
+
+ // Next character?
+ char c = *m_cur;
+ switch (c)
+ {
+ // Whitespace? Eat it and keep parsing
+ case ' ':
+ case '\t':
+ ++m_cur;
+ break;
+
+ // Newline? Eat it and advance line number
+ case '\n':
+ case '\r':
+ ++m_nLine;
+ ++m_cur;
+
+ // Eat \r\n or \n\r pair as a single character
+ if ( m_cur < m_end && *m_cur == ( '\n' + '\r' - c ) )
+ ++m_cur;
+ break;
+
+ // Single-character JSON token?
+ case ':':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case ',':
+ m_eToken = c;
+ ++m_cur;
+ return;
+
+ // String?
+ case '\"':
+ case '\'': // NOTE: We allow strings to be delimited by single quotes, which is not JSON compliant
+ ParseStringToken();
+ return;
+
+ case '-':
+ case '.':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ ParseNumberToken();
+ return;
+
+ // Literal "true"
+ case 't':
+ if ( m_cur + 4 <= m_end && m_cur[1] == 'r' && m_cur[2] == 'u' && m_cur[3] == 'e' )
+ {
+ m_cur += 4;
+ m_eToken = kToken_True;
+ return;
+ }
+ goto unexpected_char;
+
+ // Literal "false"
+ case 'f':
+ if ( m_cur + 5 <= m_end && m_cur[1] == 'a' && m_cur[2] == 'l' && m_cur[3] == 's' && m_cur[4] == 'e' )
+ {
+ m_cur += 5;
+ m_eToken = kToken_False;
+ return;
+ }
+ goto unexpected_char;
+
+ // Literal "null"
+ case 'n':
+ if ( m_cur + 4 <= m_end && m_cur[1] == 'u' && m_cur[2] == 'l' && m_cur[3] == 'l' )
+ {
+ m_cur += 4;
+ m_eToken = kToken_Null;
+ return;
+ }
+ goto unexpected_char;
+
+ case '/':
+ // C++-style comment?
+ if ( m_cur < m_end && m_cur[1] == '/' )
+ {
+ m_cur += 2;
+ while ( m_cur < m_end && *m_cur != '\n' && *m_cur != '\r' )
+ ++m_cur;
+ // Leave newline as the next character, we'll handle it above
+ break;
+ }
+ // | fall
+ // | through
+ // V
+
+ default:
+ unexpected_char:
+ if ( V_isprint(c) )
+ V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x ('%c')", (uint8)c, c );
+ else
+ V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x", (uint8)c );
+ m_eToken = kToken_Err;
+ return;
+ }
+ }
+
+ m_eToken = kToken_EOF;
+}
+
+void KeyValuesJSONParser::ParseNumberToken()
+{
+ // Clear token
+ m_vecTokenChars.SetCount(0);
+
+ // Eat leading minus sign
+ if ( *m_cur == '-' )
+ {
+ m_vecTokenChars.AddToTail( '-' );
+ ++m_cur;
+ }
+
+ if ( m_cur >= m_end )
+ {
+ V_strcpy_safe( m_szErrMsg, "Unexpected EOF while parsing number" );
+ m_eToken = kToken_Err;
+ return;
+ }
+
+ char c = *m_cur;
+ m_vecTokenChars.AddToTail( c );
+ bool bHasWholePart = false;
+ switch ( c )
+ {
+ case '0':
+ // Leading 0 cannot be followed by any more digits, as per JSON spec (and to make sure nobody tries to parse octal).
+ ++m_cur;
+ bHasWholePart = true;
+ break;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ bHasWholePart = true;
+ ++m_cur;
+
+ // Accumulate digits until we hit a non-digit
+ while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
+ m_vecTokenChars.AddToTail( *(m_cur++) );
+ break;
+
+ case '.':
+ // strict JSON doesn't allow a number that starts with a decimal point, but we do
+ break;
+ }
+
+ // Assume this is integral, unless we hit a decimal point and/or exponent
+ m_eToken = kToken_NumberInt;
+
+ // Fractional portion?
+ if ( m_cur < m_end && *m_cur == '.' )
+ {
+ m_eToken = kToken_NumberFloat;
+
+ // Eat decimal point
+ m_vecTokenChars.AddToTail( *(m_cur++) );
+
+ // Accumulate digits until we hit a non-digit
+ bool bHasFractionPart = false;
+ while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
+ {
+ m_vecTokenChars.AddToTail( *(m_cur++) );
+ bHasFractionPart = true;
+ }
+
+ // Make sure we aren't just a single '.'
+ if ( !bHasWholePart && !bHasFractionPart )
+ {
+ m_vecTokenChars.AddToTail(0);
+ V_sprintf_safe( m_szErrMsg, "Invalid number starting with '%s'", m_vecTokenChars.Base() );
+ m_eToken = kToken_Err;
+ return;
+ }
+ }
+
+ // Exponent?
+ if ( m_cur < m_end && ( *m_cur == 'e' || *m_cur == 'E' ) )
+ {
+ m_eToken = kToken_NumberFloat;
+
+ // Eat 'e'
+ m_vecTokenChars.AddToTail( *(m_cur++) );
+
+ // Optional sign
+ if ( m_cur < m_end && ( *m_cur == '-' || *m_cur == '+' ) )
+ m_vecTokenChars.AddToTail( *(m_cur++) );
+
+ // Accumulate digits until we hit a non-digit
+ bool bHasExponentDigit = false;
+ while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' )
+ {
+ m_vecTokenChars.AddToTail( *(m_cur++) );
+ bHasExponentDigit = true;
+ }
+ if ( !bHasExponentDigit )
+ {
+ V_strcpy_safe( m_szErrMsg, "Bad exponent in floating point number" );
+ m_eToken = kToken_Err;
+ return;
+ }
+ }
+
+ // OK, We have parsed a valid number.
+ // Terminate token
+ m_vecTokenChars.AddToTail( '\0' );
+
+ // EOF? That's OK for now, at this lexical parsing level. We'll handle the error
+ // at the higher parse level, when expecting a comma or closing delimiter
+ if ( m_cur >= m_end )
+ return;
+
+ // Is the next thing a valid character? This is the most common case.
+ c = *m_cur;
+ if ( V_isspace( c ) || c == ',' || c == '}' || c == ']' || c == '/' )
+ return;
+
+ // Handle these guys as "tokens", to provide a slightly more meaningful error message
+ if ( c == '[' || c == '{' )
+ return;
+
+ // Anything else, treat the whole thing as an invalid numerical constant
+ if ( V_isprint(c) )
+ V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x ('%c')", (uint8)c, c );
+ else
+ V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x", (uint8)c );
+ m_eToken = kToken_Err;
+}
+
+void KeyValuesJSONParser::ParseStringToken()
+{
+ char cDelim = *(m_cur++);
+
+ while ( m_cur < m_end )
+ {
+ char c = *(m_cur++);
+ if ( c == '\r' || c == '\n' )
+ {
+ V_sprintf_safe( m_szErrMsg, "Hit end of line before closing quote (%c)", c );
+ m_eToken = kToken_Err;
+ return;
+ }
+ if ( c == cDelim )
+ {
+ m_eToken = kToken_String;
+ m_vecTokenChars.AddToTail( '\0' );
+ return;
+ }
+
+ // Ordinary character? Just append it
+ if ( c != '\\' )
+ {
+ m_vecTokenChars.AddToTail( c );
+ continue;
+ }
+
+ // Escaped character.
+ // End of string? We'll handle it above
+ if ( m_cur >= m_end )
+ continue;
+
+ // Check table of allowed escape characters
+ switch (c)
+ {
+ case '\\':
+ case '/':
+ case '\'':
+ case '\"': m_vecTokenChars.AddToTail( c ); break;
+ case 'b': m_vecTokenChars.AddToTail( '\b' ); break;
+ case 'f': m_vecTokenChars.AddToTail( '\f' ); break;
+ case 'n': m_vecTokenChars.AddToTail( '\n' ); break;
+ case 'r': m_vecTokenChars.AddToTail( '\r' ); break;
+ case 't': m_vecTokenChars.AddToTail( '\t' ); break;
+
+ case 'u':
+ {
+
+ // Make sure are followed by exactly 4 hex digits
+ if ( m_cur + 4 > m_end || !V_isxdigit( m_cur[0] ) || !V_isxdigit( m_cur[1] ) || !V_isxdigit( m_cur[2] ) || !V_isxdigit( m_cur[3] ) )
+ {
+ V_sprintf_safe( m_szErrMsg, "\\u must be followed by exactly 4 hex digits" );
+ m_eToken = kToken_Err;
+ return;
+ }
+
+ // Parse the codepoint
+ uchar32 nCodePoint = 0;
+ for ( int n = 0 ; n < 4 ; ++n )
+ {
+ nCodePoint <<= 4;
+ char chHex = *(m_cur++);
+ if ( chHex >= '0' && chHex <= '9' )
+ nCodePoint += chHex - '0';
+ else if ( chHex >= 'a' && chHex <= 'a' )
+ nCodePoint += chHex + 0x0a - 'a';
+ else if ( chHex >= 'A' && chHex <= 'A' )
+ nCodePoint += chHex + 0x0a - 'A';
+ else
+ Assert( false ); // inconceivable, due to above
+ }
+
+ // Encode it in UTF-8
+ char utf8Encode[8];
+ int r = Q_UChar32ToUTF8( nCodePoint, utf8Encode );
+ if ( r < 0 || r > 4 )
+ {
+ V_sprintf_safe( m_szErrMsg, "Invalid code point \\u%04x", nCodePoint );
+ m_eToken = kToken_Err;
+ return;
+ }
+ for ( int i = 0 ; i < r ; ++i )
+ m_vecTokenChars.AddToTail( utf8Encode[i] );
+ } break;
+
+ default:
+ if ( V_isprint(c) )
+ V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x ('\\%c')", (uint8)c, c );
+ else
+ V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x", (uint8)c );
+ m_eToken = kToken_Err;
+ return;
+ }
+ }
+
+ V_sprintf_safe( m_szErrMsg, "Hit end of input before closing quote (%c)", cDelim );
+ m_eToken = kToken_Err;
+}
+
+const char *KeyValuesJSONParser::GetTokenDebugText()
+{
+ switch ( m_eToken )
+ {
+ case kToken_EOF: return "<EOF>";
+ case kToken_String: return "<string>";
+ case kToken_NumberInt:
+ case kToken_NumberFloat: return "<number>";
+ case kToken_True: return "'true'";
+ case kToken_False: return "'false'";
+ case kToken_Null: return "'null'";
+ case '{': return "'{'";
+ case '}': return "'}'";
+ case '[': return "'['";
+ case ']': return "']'";
+ case ':': return "':'";
+ case ',': return "','";
+ }
+
+ // We shouldn't ever need to ask for a debug string for the error token,
+ // and anything else is an error
+ Assert( false );
+ return "<parse error>";
+}
+
+#ifdef _DEBUG
+
+static void JSONTest_ParseValid( const char *pszData )
+{
+ KeyValuesJSONParser parser( pszData );
+ KeyValues *pFile = parser.ParseFile();
+ Assert( pFile );
+ pFile->deleteThis();
+}
+
+static void JSONTest_ParseInvalid( const char *pszData, const char *pszExpectedErrMsgSnippet, int nExpectedFailureLine )
+{
+ KeyValuesJSONParser parser( pszData );
+ KeyValues *pFile = parser.ParseFile();
+ Assert( pFile == NULL );
+ Assert( V_stristr( parser.m_szErrMsg, pszExpectedErrMsgSnippet ) != NULL );
+ Assert( parser.m_nLine == nExpectedFailureLine );
+}
+
+void TestKeyValuesJSONParser()
+{
+ JSONTest_ParseValid( "{}" );
+ JSONTest_ParseValid( R"JSON({
+ "key": "string_value",
+ "pos_int32": 123,
+ "pos_int64": 123456789012,
+ "neg_int32": -456,
+ "float": -45.23,
+ "pos_exponent": 1e30,
+ "neg_exponent": 1e-16,
+ "decimal_and_exponent": 1.e+30,
+ "no_leading_zero": .7, // we support this, even though strict JSON says it's no good
+ "zero": 0,
+ "true_value": true,
+ "false_value": false,
+ "null_value": null,
+ "with_escaped": "\r \t \n",
+ "unicode": "\u1234 \\u12f3",
+ "array_of_ints": [ 1, 2, 3, -45 ],
+ "empty_array": [],
+ "array_with_stuff_inside": [
+ {}, // this is a comment.
+ [ 0.45, {}, "hello!" ],
+ { "id": 0 },
+ // Trailing comma above. Comment here
+ ],
+ })JSON" );
+ JSONTest_ParseInvalid( "{ \"key\": 123", "missing", 1 );
+ JSONTest_ParseInvalid( "{ \"key\": 123.4f }", "number", 1 );
+}
+
+#endif