diff options
Diffstat (limited to 'tier1/keyvaluesjson.cpp')
| -rw-r--r-- | tier1/keyvaluesjson.cpp | 714 |
1 files changed, 714 insertions, 0 deletions
diff --git a/tier1/keyvaluesjson.cpp b/tier1/keyvaluesjson.cpp new file mode 100644 index 0000000..7d3dabf --- /dev/null +++ b/tier1/keyvaluesjson.cpp @@ -0,0 +1,714 @@ +//========= Copyright Valve Corporation, All rights reserved. =================// +// +// Read JSON-formatted data into KeyValues +// +//=============================================================================// + +#include "tier1/keyvaluesjson.h" +#include "tier1/utlbuffer.h" +#include "tier1/strtools.h" +#include <stdint.h> // INT32_MIN defn + +KeyValuesJSONParser::KeyValuesJSONParser( const CUtlBuffer &buf ) +{ + Init( (const char *)buf.Base(), buf.TellPut() ); +} + +KeyValuesJSONParser::KeyValuesJSONParser( const char *pszText, int cbSize ) +{ + Init( pszText, cbSize >= 0 ? cbSize : V_strlen(pszText) ); +} + +KeyValuesJSONParser::~KeyValuesJSONParser() {} + +void KeyValuesJSONParser::Init( const char *pszText, int cbSize ) +{ + m_szErrMsg[0] = '\0'; + m_nLine = 1; + m_cur = pszText; + m_end = pszText+cbSize; + + m_eToken = kToken_Null; + NextToken(); +} + +KeyValues *KeyValuesJSONParser::ParseFile() +{ + // A valid JSON object should contain a single object, surrounded by curly braces. + if ( m_eToken == kToken_EOF ) + { + V_sprintf_safe( m_szErrMsg, "Input contains no data" ); + return NULL; + } + if ( m_eToken == kToken_Err ) + return NULL; + if ( m_eToken == '{' ) + { + + // Parse the the entire file as one big object + KeyValues *pResult = new KeyValues(""); + if ( !ParseObject( pResult ) ) + { + pResult->deleteThis(); + return NULL; + } + if ( m_eToken == kToken_EOF ) + return pResult; + pResult->deleteThis(); + } + V_sprintf_safe( m_szErrMsg, "%s not expected here. A valid JSON document should be a single object, which begins with '{' and ends with '}'", GetTokenDebugText() ); + return NULL; +} + +bool KeyValuesJSONParser::ParseObject( KeyValues *pObject ) +{ + Assert( m_eToken == '{' ); + int nOpenDelimLine = m_nLine; + NextToken(); + KeyValues *pLastChild = NULL; + while ( m_eToken != '}' ) + { + // Parse error? + if ( m_eToken == kToken_Err ) + return false; + if ( m_eToken == kToken_EOF ) + { + // Actually report the error at the line of the unmatched delimiter. + // There's no need to report the line number of the end of file, that is always + // useless. + m_nLine = nOpenDelimLine; + V_strcpy_safe( m_szErrMsg, "End of input was reached and '{' was not matched by '}'" ); + return false; + } + + // It must be a string, for the key name + if ( m_eToken != kToken_String ) + { + V_sprintf_safe( m_szErrMsg, "%s not expected here; expected string for key name or '}'", GetTokenDebugText() ); + return false; + } + + KeyValues *pChildValue = new KeyValues( m_vecTokenChars.Base() ); + NextToken(); + + // Expect and eat colon + if ( m_eToken != ':' ) + { + V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ':'?", GetTokenDebugText() ); + pChildValue->deleteThis(); + return false; + } + NextToken(); + + // Recursively parse the value + if ( !ParseValue( pChildValue ) ) + { + pChildValue->deleteThis(); + return false; + } + + // Add to parent. + pObject->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild ); + pLastChild = pChildValue; + + // Eat the comma, if there is one. If no comma, + // then the other thing that could come next + // is the closing brace to close the object + // NOTE: We are allowing the extra comma after the last item + if ( m_eToken == ',' ) + { + NextToken(); + } + else if ( m_eToken != '}' ) + { + V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or '}'?", GetTokenDebugText() ); + return false; + } + } + + // Eat closing '}' + NextToken(); + + // Success + return true; +} + +bool KeyValuesJSONParser::ParseArray( KeyValues *pArray ) +{ + Assert( m_eToken == '[' ); + int nOpenDelimLine = m_nLine; + NextToken(); + KeyValues *pLastChild = NULL; + int idx = 0; + while ( m_eToken != ']' ) + { + // Parse error? + if ( m_eToken == kToken_Err ) + return false; + if ( m_eToken == kToken_EOF ) + { + // Actually report the error at the line of the unmatched delimiter. + // There's no need to report the line number of the end of file, that is always + // useless. + m_nLine = nOpenDelimLine; + V_strcpy_safe( m_szErrMsg, "End of input was reached and '[' was not matched by ']'" ); + return false; + } + + // Set a dummy key name based on the index + char szKeyName[ 32 ]; + V_sprintf_safe( szKeyName, "%d", idx ); + ++idx; + KeyValues *pChildValue = new KeyValues( szKeyName ); + + // Recursively parse the value + if ( !ParseValue( pChildValue ) ) + { + pChildValue->deleteThis(); + return false; + } + + // Add to parent. + pArray->AddSubkeyUsingKnownLastChild( pChildValue, pLastChild ); + pLastChild = pChildValue; + + // Handle a colon here specially. If one appears, the odds are they + // are trying to put object-like data inside of an array + if ( m_eToken == ':' ) + { + V_sprintf_safe( m_szErrMsg, "':' not expected inside an array. ('[]' used when '{}' was intended?)" ); + return false; + } + + // Eat the comma, if there is one. If no comma, + // then the other thing that could come next + // is the closing brace to close the object + // NOTE: We are allowing the extra comma after the last item + if ( m_eToken == ',' ) + { + NextToken(); + } + else if ( m_eToken != ']' ) + { + V_sprintf_safe( m_szErrMsg, "%s not expected here. Missing ',' or ']'?", GetTokenDebugText() ); + return false; + } + } + + // Eat closing ']' + NextToken(); + + // Success + return true; +} + +bool KeyValuesJSONParser::ParseValue( KeyValues *pValue ) +{ + switch ( m_eToken ) + { + case '{': return ParseObject( pValue ); + case '[': return ParseArray( pValue ); + case kToken_String: + pValue->SetString( NULL, m_vecTokenChars.Base() ); + NextToken(); + return true; + + case kToken_NumberInt: + { + const char *pszNum = m_vecTokenChars.Base(); + + // Negative? + if ( *pszNum == '-' ) + { + int64 val64 = V_atoi64( pszNum ); + if ( val64 < INT32_MIN ) + { + // !KLUDGE! KeyValues cannot support this! + V_sprintf_safe( m_szErrMsg, "%s is out of range for KeyValues, which doesn't support signed 64-bit numbers", pszNum ); + return false; + } + + pValue->SetInt( NULL, (int)val64 ); + } + else + { + uint64 val64 = V_atoui64( pszNum ); + if ( val64 > 0x7fffffffU ) + { + pValue->SetUint64( NULL, val64 ); + } + else + { + pValue->SetInt( NULL, (int)val64 ); + } + } + NextToken(); + return true; + } + + case kToken_NumberFloat: + { + float f = V_atof( m_vecTokenChars.Base() ); + pValue->SetFloat( NULL, f ); + NextToken(); + return true; + } + + case kToken_True: + pValue->SetBool( NULL, true ); + NextToken(); + return true; + + case kToken_False: + pValue->SetBool( NULL, false ); + NextToken(); + return true; + + case kToken_Null: + pValue->SetPtr( NULL, NULL ); + NextToken(); + return true; + + case kToken_Err: + return false; + } + + V_sprintf_safe( m_szErrMsg, "%s not expected here; missing value?", GetTokenDebugText() ); + return false; +} + +void KeyValuesJSONParser::NextToken() +{ + + // Already in terminal state? + if ( m_eToken < 0 ) + return; + + // Clear token + m_vecTokenChars.SetCount(0); + + // Scan until we hit the end of input + while ( m_cur < m_end ) + { + + // Next character? + char c = *m_cur; + switch (c) + { + // Whitespace? Eat it and keep parsing + case ' ': + case '\t': + ++m_cur; + break; + + // Newline? Eat it and advance line number + case '\n': + case '\r': + ++m_nLine; + ++m_cur; + + // Eat \r\n or \n\r pair as a single character + if ( m_cur < m_end && *m_cur == ( '\n' + '\r' - c ) ) + ++m_cur; + break; + + // Single-character JSON token? + case ':': + case '{': + case '}': + case '[': + case ']': + case ',': + m_eToken = c; + ++m_cur; + return; + + // String? + case '\"': + case '\'': // NOTE: We allow strings to be delimited by single quotes, which is not JSON compliant + ParseStringToken(); + return; + + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + ParseNumberToken(); + return; + + // Literal "true" + case 't': + if ( m_cur + 4 <= m_end && m_cur[1] == 'r' && m_cur[2] == 'u' && m_cur[3] == 'e' ) + { + m_cur += 4; + m_eToken = kToken_True; + return; + } + goto unexpected_char; + + // Literal "false" + case 'f': + if ( m_cur + 5 <= m_end && m_cur[1] == 'a' && m_cur[2] == 'l' && m_cur[3] == 's' && m_cur[4] == 'e' ) + { + m_cur += 5; + m_eToken = kToken_False; + return; + } + goto unexpected_char; + + // Literal "null" + case 'n': + if ( m_cur + 4 <= m_end && m_cur[1] == 'u' && m_cur[2] == 'l' && m_cur[3] == 'l' ) + { + m_cur += 4; + m_eToken = kToken_Null; + return; + } + goto unexpected_char; + + case '/': + // C++-style comment? + if ( m_cur < m_end && m_cur[1] == '/' ) + { + m_cur += 2; + while ( m_cur < m_end && *m_cur != '\n' && *m_cur != '\r' ) + ++m_cur; + // Leave newline as the next character, we'll handle it above + break; + } + // | fall + // | through + // V + + default: + unexpected_char: + if ( V_isprint(c) ) + V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x ('%c')", (uint8)c, c ); + else + V_sprintf_safe( m_szErrMsg, "Unexpected character 0x%02x", (uint8)c ); + m_eToken = kToken_Err; + return; + } + } + + m_eToken = kToken_EOF; +} + +void KeyValuesJSONParser::ParseNumberToken() +{ + // Clear token + m_vecTokenChars.SetCount(0); + + // Eat leading minus sign + if ( *m_cur == '-' ) + { + m_vecTokenChars.AddToTail( '-' ); + ++m_cur; + } + + if ( m_cur >= m_end ) + { + V_strcpy_safe( m_szErrMsg, "Unexpected EOF while parsing number" ); + m_eToken = kToken_Err; + return; + } + + char c = *m_cur; + m_vecTokenChars.AddToTail( c ); + bool bHasWholePart = false; + switch ( c ) + { + case '0': + // Leading 0 cannot be followed by any more digits, as per JSON spec (and to make sure nobody tries to parse octal). + ++m_cur; + bHasWholePart = true; + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + bHasWholePart = true; + ++m_cur; + + // Accumulate digits until we hit a non-digit + while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' ) + m_vecTokenChars.AddToTail( *(m_cur++) ); + break; + + case '.': + // strict JSON doesn't allow a number that starts with a decimal point, but we do + break; + } + + // Assume this is integral, unless we hit a decimal point and/or exponent + m_eToken = kToken_NumberInt; + + // Fractional portion? + if ( m_cur < m_end && *m_cur == '.' ) + { + m_eToken = kToken_NumberFloat; + + // Eat decimal point + m_vecTokenChars.AddToTail( *(m_cur++) ); + + // Accumulate digits until we hit a non-digit + bool bHasFractionPart = false; + while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' ) + { + m_vecTokenChars.AddToTail( *(m_cur++) ); + bHasFractionPart = true; + } + + // Make sure we aren't just a single '.' + if ( !bHasWholePart && !bHasFractionPart ) + { + m_vecTokenChars.AddToTail(0); + V_sprintf_safe( m_szErrMsg, "Invalid number starting with '%s'", m_vecTokenChars.Base() ); + m_eToken = kToken_Err; + return; + } + } + + // Exponent? + if ( m_cur < m_end && ( *m_cur == 'e' || *m_cur == 'E' ) ) + { + m_eToken = kToken_NumberFloat; + + // Eat 'e' + m_vecTokenChars.AddToTail( *(m_cur++) ); + + // Optional sign + if ( m_cur < m_end && ( *m_cur == '-' || *m_cur == '+' ) ) + m_vecTokenChars.AddToTail( *(m_cur++) ); + + // Accumulate digits until we hit a non-digit + bool bHasExponentDigit = false; + while ( m_cur < m_end && *m_cur >= '0' && *m_cur <= '9' ) + { + m_vecTokenChars.AddToTail( *(m_cur++) ); + bHasExponentDigit = true; + } + if ( !bHasExponentDigit ) + { + V_strcpy_safe( m_szErrMsg, "Bad exponent in floating point number" ); + m_eToken = kToken_Err; + return; + } + } + + // OK, We have parsed a valid number. + // Terminate token + m_vecTokenChars.AddToTail( '\0' ); + + // EOF? That's OK for now, at this lexical parsing level. We'll handle the error + // at the higher parse level, when expecting a comma or closing delimiter + if ( m_cur >= m_end ) + return; + + // Is the next thing a valid character? This is the most common case. + c = *m_cur; + if ( V_isspace( c ) || c == ',' || c == '}' || c == ']' || c == '/' ) + return; + + // Handle these guys as "tokens", to provide a slightly more meaningful error message + if ( c == '[' || c == '{' ) + return; + + // Anything else, treat the whole thing as an invalid numerical constant + if ( V_isprint(c) ) + V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x ('%c')", (uint8)c, c ); + else + V_sprintf_safe( m_szErrMsg, "Number contains invalid character 0x%02x", (uint8)c ); + m_eToken = kToken_Err; +} + +void KeyValuesJSONParser::ParseStringToken() +{ + char cDelim = *(m_cur++); + + while ( m_cur < m_end ) + { + char c = *(m_cur++); + if ( c == '\r' || c == '\n' ) + { + V_sprintf_safe( m_szErrMsg, "Hit end of line before closing quote (%c)", c ); + m_eToken = kToken_Err; + return; + } + if ( c == cDelim ) + { + m_eToken = kToken_String; + m_vecTokenChars.AddToTail( '\0' ); + return; + } + + // Ordinary character? Just append it + if ( c != '\\' ) + { + m_vecTokenChars.AddToTail( c ); + continue; + } + + // Escaped character. + // End of string? We'll handle it above + if ( m_cur >= m_end ) + continue; + + // Check table of allowed escape characters + switch (c) + { + case '\\': + case '/': + case '\'': + case '\"': m_vecTokenChars.AddToTail( c ); break; + case 'b': m_vecTokenChars.AddToTail( '\b' ); break; + case 'f': m_vecTokenChars.AddToTail( '\f' ); break; + case 'n': m_vecTokenChars.AddToTail( '\n' ); break; + case 'r': m_vecTokenChars.AddToTail( '\r' ); break; + case 't': m_vecTokenChars.AddToTail( '\t' ); break; + + case 'u': + { + + // Make sure are followed by exactly 4 hex digits + if ( m_cur + 4 > m_end || !V_isxdigit( m_cur[0] ) || !V_isxdigit( m_cur[1] ) || !V_isxdigit( m_cur[2] ) || !V_isxdigit( m_cur[3] ) ) + { + V_sprintf_safe( m_szErrMsg, "\\u must be followed by exactly 4 hex digits" ); + m_eToken = kToken_Err; + return; + } + + // Parse the codepoint + uchar32 nCodePoint = 0; + for ( int n = 0 ; n < 4 ; ++n ) + { + nCodePoint <<= 4; + char chHex = *(m_cur++); + if ( chHex >= '0' && chHex <= '9' ) + nCodePoint += chHex - '0'; + else if ( chHex >= 'a' && chHex <= 'a' ) + nCodePoint += chHex + 0x0a - 'a'; + else if ( chHex >= 'A' && chHex <= 'A' ) + nCodePoint += chHex + 0x0a - 'A'; + else + Assert( false ); // inconceivable, due to above + } + + // Encode it in UTF-8 + char utf8Encode[8]; + int r = Q_UChar32ToUTF8( nCodePoint, utf8Encode ); + if ( r < 0 || r > 4 ) + { + V_sprintf_safe( m_szErrMsg, "Invalid code point \\u%04x", nCodePoint ); + m_eToken = kToken_Err; + return; + } + for ( int i = 0 ; i < r ; ++i ) + m_vecTokenChars.AddToTail( utf8Encode[i] ); + } break; + + default: + if ( V_isprint(c) ) + V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x ('\\%c')", (uint8)c, c ); + else + V_sprintf_safe( m_szErrMsg, "Invalid escape character 0x%02x", (uint8)c ); + m_eToken = kToken_Err; + return; + } + } + + V_sprintf_safe( m_szErrMsg, "Hit end of input before closing quote (%c)", cDelim ); + m_eToken = kToken_Err; +} + +const char *KeyValuesJSONParser::GetTokenDebugText() +{ + switch ( m_eToken ) + { + case kToken_EOF: return "<EOF>"; + case kToken_String: return "<string>"; + case kToken_NumberInt: + case kToken_NumberFloat: return "<number>"; + case kToken_True: return "'true'"; + case kToken_False: return "'false'"; + case kToken_Null: return "'null'"; + case '{': return "'{'"; + case '}': return "'}'"; + case '[': return "'['"; + case ']': return "']'"; + case ':': return "':'"; + case ',': return "','"; + } + + // We shouldn't ever need to ask for a debug string for the error token, + // and anything else is an error + Assert( false ); + return "<parse error>"; +} + +#ifdef _DEBUG + +static void JSONTest_ParseValid( const char *pszData ) +{ + KeyValuesJSONParser parser( pszData ); + KeyValues *pFile = parser.ParseFile(); + Assert( pFile ); + pFile->deleteThis(); +} + +static void JSONTest_ParseInvalid( const char *pszData, const char *pszExpectedErrMsgSnippet, int nExpectedFailureLine ) +{ + KeyValuesJSONParser parser( pszData ); + KeyValues *pFile = parser.ParseFile(); + Assert( pFile == NULL ); + Assert( V_stristr( parser.m_szErrMsg, pszExpectedErrMsgSnippet ) != NULL ); + Assert( parser.m_nLine == nExpectedFailureLine ); +} + +void TestKeyValuesJSONParser() +{ + JSONTest_ParseValid( "{}" ); + JSONTest_ParseValid( R"JSON({ + "key": "string_value", + "pos_int32": 123, + "pos_int64": 123456789012, + "neg_int32": -456, + "float": -45.23, + "pos_exponent": 1e30, + "neg_exponent": 1e-16, + "decimal_and_exponent": 1.e+30, + "no_leading_zero": .7, // we support this, even though strict JSON says it's no good + "zero": 0, + "true_value": true, + "false_value": false, + "null_value": null, + "with_escaped": "\r \t \n", + "unicode": "\u1234 \\u12f3", + "array_of_ints": [ 1, 2, 3, -45 ], + "empty_array": [], + "array_with_stuff_inside": [ + {}, // this is a comment. + [ 0.45, {}, "hello!" ], + { "id": 0 }, + // Trailing comma above. Comment here + ], + })JSON" ); + JSONTest_ParseInvalid( "{ \"key\": 123", "missing", 1 ); + JSONTest_ParseInvalid( "{ \"key\": 123.4f }", "number", 1 ); +} + +#endif |