diff options
Diffstat (limited to 'toolkit/components/jsoncpp/src/lib_json/json_reader.cpp')
-rw-r--r-- | toolkit/components/jsoncpp/src/lib_json/json_reader.cpp | 1992 |
1 files changed, 1992 insertions, 0 deletions
diff --git a/toolkit/components/jsoncpp/src/lib_json/json_reader.cpp b/toolkit/components/jsoncpp/src/lib_json/json_reader.cpp new file mode 100644 index 0000000000..a6a3f4e30d --- /dev/null +++ b/toolkit/components/jsoncpp/src/lib_json/json_reader.cpp @@ -0,0 +1,1992 @@ +// Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors +// Copyright (C) 2016 InfoTeCS JSC. All rights reserved. +// Distributed under MIT license, or public domain if desired and +// recognized in your jurisdiction. +// See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE + +#if !defined(JSON_IS_AMALGAMATION) +#include "json_tool.h" +#include <json/assertions.h> +#include <json/reader.h> +#include <json/value.h> +#endif // if !defined(JSON_IS_AMALGAMATION) +#include <algorithm> +#include <cassert> +#include <cstring> +#include <iostream> +#include <istream> +#include <limits> +#include <memory> +#include <set> +#include <sstream> +#include <utility> + +#include <cstdio> +#if __cplusplus >= 201103L + +#if !defined(sscanf) +#define sscanf std::sscanf +#endif + +#endif //__cplusplus + +#if defined(_MSC_VER) +#if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES) +#define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1 +#endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES +#endif //_MSC_VER + +#if defined(_MSC_VER) +// Disable warning about strdup being deprecated. +#pragma warning(disable : 4996) +#endif + +// Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile +// time to change the stack limit +#if !defined(JSONCPP_DEPRECATED_STACK_LIMIT) +#define JSONCPP_DEPRECATED_STACK_LIMIT 1000 +#endif + +static size_t const stackLimit_g = + JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue() + +namespace Json { + +#if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520) +using CharReaderPtr = std::unique_ptr<CharReader>; +#else +using CharReaderPtr = std::auto_ptr<CharReader>; +#endif + +// Implementation of class Features +// //////////////////////////////// + +Features::Features() = default; + +Features Features::all() { return {}; } + +Features Features::strictMode() { + Features features; + features.allowComments_ = false; + features.strictRoot_ = true; + features.allowDroppedNullPlaceholders_ = false; + features.allowNumericKeys_ = false; + return features; +} + +// Implementation of class Reader +// //////////////////////////////// + +bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) { + return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; }); +} + +// Class Reader +// ////////////////////////////////////////////////////////////////// + +Reader::Reader() : features_(Features::all()) {} + +Reader::Reader(const Features& features) : features_(features) {} + +bool Reader::parse(const std::string& document, Value& root, + bool collectComments) { + document_.assign(document.begin(), document.end()); + const char* begin = document_.c_str(); + const char* end = begin + document_.length(); + return parse(begin, end, root, collectComments); +} + +bool Reader::parse(std::istream& is, Value& root, bool collectComments) { + // std::istream_iterator<char> begin(is); + // std::istream_iterator<char> end; + // Those would allow streamed input from a file, if parse() were a + // template function. + + // Since String is reference-counted, this at least does not + // create an extra copy. + String doc(std::istreambuf_iterator<char>(is), {}); + return parse(doc.data(), doc.data() + doc.size(), root, collectComments); +} + +bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root, + bool collectComments) { + if (!features_.allowComments_) { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = nullptr; + lastValue_ = nullptr; + commentsBefore_.clear(); + errors_.clear(); + while (!nodes_.empty()) + nodes_.pop(); + nodes_.push(&root); + + bool successful = readValue(); + Token token; + skipCommentTokens(token); + if (collectComments_ && !commentsBefore_.empty()) + root.setComment(commentsBefore_, commentAfter); + if (features_.strictRoot_) { + if (!root.isArray() && !root.isObject()) { + // Set error location to start of doc, ideally should be first token found + // in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( + "A valid JSON document must be either an array or an object value.", + token); + return false; + } + } + return successful; +} + +bool Reader::readValue() { + // readValue() may call itself only if it calls readObject() or ReadArray(). + // These methods execute nodes_.push() just before and nodes_.pop)() just + // after calling readValue(). parse() executes one nodes_.push(), so > instead + // of >=. + if (nodes_.size() > stackLimit_g) + throwRuntimeError("Exceeded stackLimit in readValue()."); + + Token token; + skipCommentTokens(token); + bool successful = true; + + if (collectComments_ && !commentsBefore_.empty()) { + currentValue().setComment(commentsBefore_, commentBefore); + commentsBefore_.clear(); + } + + switch (token.type_) { + case tokenObjectBegin: + successful = readObject(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenArrayBegin: + successful = readArray(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenNumber: + successful = decodeNumber(token); + break; + case tokenString: + successful = decodeString(token); + break; + case tokenTrue: { + Value v(true); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenFalse: { + Value v(false); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNull: { + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenArraySeparator: + case tokenObjectEnd: + case tokenArrayEnd: + if (features_.allowDroppedNullPlaceholders_) { + // "Un-read" the current token and mark the current value as a null + // token. + current_--; + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(current_ - begin_ - 1); + currentValue().setOffsetLimit(current_ - begin_); + break; + } // Else, fall through... + default: + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return addError("Syntax error: value, object or array expected.", token); + } + + if (collectComments_) { + lastValueEnd_ = current_; + lastValue_ = ¤tValue(); + } + + return successful; +} + +void Reader::skipCommentTokens(Token& token) { + if (features_.allowComments_) { + do { + readToken(token); + } while (token.type_ == tokenComment); + } else { + readToken(token); + } +} + +bool Reader::readToken(Token& token) { + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch (c) { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token.type_ = tokenNumber; + readNumber(); + break; + case 't': + token.type_ = tokenTrue; + ok = match("rue", 3); + break; + case 'f': + token.type_ = tokenFalse; + ok = match("alse", 4); + break; + case 'n': + token.type_ = tokenNull; + ok = match("ull", 3); + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if (!ok) + token.type_ = tokenError; + token.end_ = current_; + return ok; +} + +void Reader::skipSpaces() { + while (current_ != end_) { + Char c = *current_; + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + ++current_; + else + break; + } +} + +bool Reader::match(const Char* pattern, int patternLength) { + if (end_ - current_ < patternLength) + return false; + int index = patternLength; + while (index--) + if (current_[index] != pattern[index]) + return false; + current_ += patternLength; + return true; +} + +bool Reader::readComment() { + Location commentBegin = current_ - 1; + Char c = getNextChar(); + bool successful = false; + if (c == '*') + successful = readCStyleComment(); + else if (c == '/') + successful = readCppStyleComment(); + if (!successful) + return false; + + if (collectComments_) { + CommentPlacement placement = commentBefore; + if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { + if (c != '*' || !containsNewLine(commentBegin, current_)) + placement = commentAfterOnSameLine; + } + + addComment(commentBegin, current_, placement); + } + return true; +} + +String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) { + String normalized; + normalized.reserve(static_cast<size_t>(end - begin)); + Reader::Location current = begin; + while (current != end) { + char c = *current++; + if (c == '\r') { + if (current != end && *current == '\n') + // convert dos EOL + ++current; + // convert Mac EOL + normalized += '\n'; + } else { + normalized += c; + } + } + return normalized; +} + +void Reader::addComment(Location begin, Location end, + CommentPlacement placement) { + assert(collectComments_); + const String& normalized = normalizeEOL(begin, end); + if (placement == commentAfterOnSameLine) { + assert(lastValue_ != nullptr); + lastValue_->setComment(normalized, placement); + } else { + commentsBefore_ += normalized; + } +} + +bool Reader::readCStyleComment() { + while ((current_ + 1) < end_) { + Char c = getNextChar(); + if (c == '*' && *current_ == '/') + break; + } + return getNextChar() == '/'; +} + +bool Reader::readCppStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } + } + return true; +} + +void Reader::readNumber() { + Location p = current_; + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : '\0'; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } +} + +bool Reader::readString() { + Char c = '\0'; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '"') + break; + } + return c == '"'; +} + +bool Reader::readObject(Token& token) { + Token tokenName; + String name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); + while (readToken(tokenName)) { + bool initialTokenOk = true; + while (tokenName.type_ == tokenComment && initialTokenOk) + initialTokenOk = readToken(tokenName); + if (!initialTokenOk) + break; + if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object + return true; + name.clear(); + if (tokenName.type_ == tokenString) { + if (!decodeString(tokenName, name)) + return recoverFromError(tokenObjectEnd); + } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { + Value numberName; + if (!decodeNumber(tokenName, numberName)) + return recoverFromError(tokenObjectEnd); + name = numberName.asString(); + } else { + break; + } + + Token colon; + if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { + return addErrorAndRecover("Missing ':' after object member name", colon, + tokenObjectEnd); + } + Value& value = currentValue()[name]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenObjectEnd); + + Token comma; + if (!readToken(comma) || + (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment)) { + return addErrorAndRecover("Missing ',' or '}' in object declaration", + comma, tokenObjectEnd); + } + bool finalizeTokenOk = true; + while (comma.type_ == tokenComment && finalizeTokenOk) + finalizeTokenOk = readToken(comma); + if (comma.type_ == tokenObjectEnd) + return true; + } + return addErrorAndRecover("Missing '}' or object member name", tokenName, + tokenObjectEnd); +} + +bool Reader::readArray(Token& token) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); + skipSpaces(); + if (current_ != end_ && *current_ == ']') // empty array + { + Token endArray; + readToken(endArray); + return true; + } + int index = 0; + for (;;) { + Value& value = currentValue()[index++]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenArrayEnd); + + Token currentToken; + // Accept Comment after last item in the array. + ok = readToken(currentToken); + while (currentToken.type_ == tokenComment && ok) { + ok = readToken(currentToken); + } + bool badTokenType = (currentToken.type_ != tokenArraySeparator && + currentToken.type_ != tokenArrayEnd); + if (!ok || badTokenType) { + return addErrorAndRecover("Missing ',' or ']' in array declaration", + currentToken, tokenArrayEnd); + } + if (currentToken.type_ == tokenArrayEnd) + break; + } + return true; +} + +bool Reader::decodeNumber(Token& token) { + Value decoded; + if (!decodeNumber(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool Reader::decodeNumber(Token& token, Value& decoded) { + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + bool isNegative = *current == '-'; + if (isNegative) + ++current; + // TODO: Help the compiler do the div and mod at compile time or get rid of + // them. + Value::LargestUInt maxIntegerValue = + isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1 + : Value::maxLargestUInt; + Value::LargestUInt threshold = maxIntegerValue / 10; + Value::LargestUInt value = 0; + while (current < token.end_) { + Char c = *current++; + if (c < '0' || c > '9') + return decodeDouble(token, decoded); + auto digit(static_cast<Value::UInt>(c - '0')); + if (value >= threshold) { + // We've hit or exceeded the max value divided by 10 (rounded down). If + // a) we've only just touched the limit, b) this is the last digit, and + // c) it's small enough to fit in that rounding delta, we're okay. + // Otherwise treat this number as a double to avoid overflow. + if (value > threshold || current != token.end_ || + digit > maxIntegerValue % 10) { + return decodeDouble(token, decoded); + } + } + value = value * 10 + digit; + } + if (isNegative && value == maxIntegerValue) + decoded = Value::minLargestInt; + else if (isNegative) + decoded = -Value::LargestInt(value); + else if (value <= Value::LargestUInt(Value::maxInt)) + decoded = Value::LargestInt(value); + else + decoded = value; + return true; +} + +bool Reader::decodeDouble(Token& token) { + Value decoded; + if (!decodeDouble(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool Reader::decodeDouble(Token& token, Value& decoded) { + double value = 0; + String buffer(token.start_, token.end_); + IStringStream is(buffer); + if (!(is >> value)) + return addError( + "'" + String(token.start_, token.end_) + "' is not a number.", token); + decoded = value; + return true; +} + +bool Reader::decodeString(Token& token) { + String decoded_string; + if (!decodeString(token, decoded_string)) + return false; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool Reader::decodeString(Token& token, String& decoded) { + decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2)); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while (current != end) { + Char c = *current++; + if (c == '"') + break; + if (c == '\\') { + if (current == end) + return addError("Empty escape sequence in string", token, current); + Char escape = *current++; + switch (escape) { + case '"': + decoded += '"'; + break; + case '/': + decoded += '/'; + break; + case '\\': + decoded += '\\'; + break; + case 'b': + decoded += '\b'; + break; + case 'f': + decoded += '\f'; + break; + case 'n': + decoded += '\n'; + break; + case 'r': + decoded += '\r'; + break; + case 't': + decoded += '\t'; + break; + case 'u': { + unsigned int unicode; + if (!decodeUnicodeCodePoint(token, current, end, unicode)) + return false; + decoded += codePointToUTF8(unicode); + } break; + default: + return addError("Bad escape sequence in string", token, current); + } + } else { + decoded += c; + } + } + return true; +} + +bool Reader::decodeUnicodeCodePoint(Token& token, Location& current, + Location end, unsigned int& unicode) { + + if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) { + // surrogate pairs + if (end - current < 6) + return addError( + "additional six characters expected to parse unicode surrogate pair.", + token, current); + if (*(current++) == '\\' && *(current++) == 'u') { + unsigned int surrogatePair; + if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } else + return false; + } else + return addError("expecting another \\u token to begin the second half of " + "a unicode surrogate pair", + token, current); + } + return true; +} + +bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current, + Location end, + unsigned int& ret_unicode) { + if (end - current < 4) + return addError( + "Bad unicode escape sequence in string: four digits expected.", token, + current); + int unicode = 0; + for (int index = 0; index < 4; ++index) { + Char c = *current++; + unicode *= 16; + if (c >= '0' && c <= '9') + unicode += c - '0'; + else if (c >= 'a' && c <= 'f') + unicode += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + unicode += c - 'A' + 10; + else + return addError( + "Bad unicode escape sequence in string: hexadecimal digit expected.", + token, current); + } + ret_unicode = static_cast<unsigned int>(unicode); + return true; +} + +bool Reader::addError(const String& message, Token& token, Location extra) { + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back(info); + return false; +} + +bool Reader::recoverFromError(TokenType skipUntilToken) { + size_t const errorCount = errors_.size(); + Token skip; + for (;;) { + if (!readToken(skip)) + errors_.resize(errorCount); // discard errors caused by recovery + if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) + break; + } + errors_.resize(errorCount); + return false; +} + +bool Reader::addErrorAndRecover(const String& message, Token& token, + TokenType skipUntilToken) { + addError(message, token); + return recoverFromError(skipUntilToken); +} + +Value& Reader::currentValue() { return *(nodes_.top()); } + +Reader::Char Reader::getNextChar() { + if (current_ == end_) + return 0; + return *current_++; +} + +void Reader::getLocationLineAndColumn(Location location, int& line, + int& column) const { + Location current = begin_; + Location lastLineStart = current; + line = 0; + while (current < location && current != end_) { + Char c = *current++; + if (c == '\r') { + if (*current == '\n') + ++current; + lastLineStart = current; + ++line; + } else if (c == '\n') { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + +String Reader::getLocationLineAndColumn(Location location) const { + int line, column; + getLocationLineAndColumn(location, line, column); + char buffer[18 + 16 + 16 + 1]; + jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); + return buffer; +} + +// Deprecated. Preserved for backward compatibility +String Reader::getFormatedErrorMessages() const { + return getFormattedErrorMessages(); +} + +String Reader::getFormattedErrorMessages() const { + String formattedMessage; + for (const auto& error : errors_) { + formattedMessage += + "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if (error.extra_) + formattedMessage += + "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; + } + return formattedMessage; +} + +std::vector<Reader::StructuredError> Reader::getStructuredErrors() const { + std::vector<Reader::StructuredError> allErrors; + for (const auto& error : errors_) { + Reader::StructuredError structured; + structured.offset_start = error.token_.start_ - begin_; + structured.offset_limit = error.token_.end_ - begin_; + structured.message = error.message_; + allErrors.push_back(structured); + } + return allErrors; +} + +bool Reader::pushError(const Value& value, const String& message) { + ptrdiff_t const length = end_ - begin_; + if (value.getOffsetStart() > length || value.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = begin_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = nullptr; + errors_.push_back(info); + return true; +} + +bool Reader::pushError(const Value& value, const String& message, + const Value& extra) { + ptrdiff_t const length = end_ - begin_; + if (value.getOffsetStart() > length || value.getOffsetLimit() > length || + extra.getOffsetLimit() > length) + return false; + Token token; + token.type_ = tokenError; + token.start_ = begin_ + value.getOffsetStart(); + token.end_ = begin_ + value.getOffsetLimit(); + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = begin_ + extra.getOffsetStart(); + errors_.push_back(info); + return true; +} + +bool Reader::good() const { return errors_.empty(); } + +// Originally copied from the Features class (now deprecated), used internally +// for features implementation. +class OurFeatures { +public: + static OurFeatures all(); + bool allowComments_; + bool allowTrailingCommas_; + bool strictRoot_; + bool allowDroppedNullPlaceholders_; + bool allowNumericKeys_; + bool allowSingleQuotes_; + bool failIfExtra_; + bool rejectDupKeys_; + bool allowSpecialFloats_; + bool skipBom_; + size_t stackLimit_; +}; // OurFeatures + +OurFeatures OurFeatures::all() { return {}; } + +// Implementation of class Reader +// //////////////////////////////// + +// Originally copied from the Reader class (now deprecated), used internally +// for implementing JSON reading. +class OurReader { +public: + using Char = char; + using Location = const Char*; + struct StructuredError { + ptrdiff_t offset_start; + ptrdiff_t offset_limit; + String message; + }; + + explicit OurReader(OurFeatures const& features); + bool parse(const char* beginDoc, const char* endDoc, Value& root, + bool collectComments = true); + String getFormattedErrorMessages() const; + std::vector<StructuredError> getStructuredErrors() const; + +private: + OurReader(OurReader const&); // no impl + void operator=(OurReader const&); // no impl + + enum TokenType { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenNaN, + tokenPosInf, + tokenNegInf, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo { + public: + Token token_; + String message_; + Location extra_; + }; + + using Errors = std::deque<ErrorInfo>; + + bool readToken(Token& token); + void skipSpaces(); + void skipBom(bool skipBom); + bool match(const Char* pattern, int patternLength); + bool readComment(); + bool readCStyleComment(bool* containsNewLineResult); + bool readCppStyleComment(); + bool readString(); + bool readStringSingleQuote(); + bool readNumber(bool checkInf); + bool readValue(); + bool readObject(Token& token); + bool readArray(Token& token); + bool decodeNumber(Token& token); + bool decodeNumber(Token& token, Value& decoded); + bool decodeString(Token& token); + bool decodeString(Token& token, String& decoded); + bool decodeDouble(Token& token); + bool decodeDouble(Token& token, Value& decoded); + bool decodeUnicodeCodePoint(Token& token, Location& current, Location end, + unsigned int& unicode); + bool decodeUnicodeEscapeSequence(Token& token, Location& current, + Location end, unsigned int& unicode); + bool addError(const String& message, Token& token, Location extra = nullptr); + bool recoverFromError(TokenType skipUntilToken); + bool addErrorAndRecover(const String& message, Token& token, + TokenType skipUntilToken); + void skipUntilSpace(); + Value& currentValue(); + Char getNextChar(); + void getLocationLineAndColumn(Location location, int& line, + int& column) const; + String getLocationLineAndColumn(Location location) const; + void addComment(Location begin, Location end, CommentPlacement placement); + void skipCommentTokens(Token& token); + + static String normalizeEOL(Location begin, Location end); + static bool containsNewLine(Location begin, Location end); + + using Nodes = std::stack<Value*>; + + Nodes nodes_{}; + Errors errors_{}; + String document_{}; + Location begin_ = nullptr; + Location end_ = nullptr; + Location current_ = nullptr; + Location lastValueEnd_ = nullptr; + Value* lastValue_ = nullptr; + bool lastValueHasAComment_ = false; + String commentsBefore_{}; + + OurFeatures const features_; + bool collectComments_ = false; +}; // OurReader + +// complete copy of Read impl, for OurReader + +bool OurReader::containsNewLine(OurReader::Location begin, + OurReader::Location end) { + return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; }); +} + +OurReader::OurReader(OurFeatures const& features) : features_(features) {} + +bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, + bool collectComments) { + if (!features_.allowComments_) { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = nullptr; + lastValue_ = nullptr; + commentsBefore_.clear(); + errors_.clear(); + while (!nodes_.empty()) + nodes_.pop(); + nodes_.push(&root); + + // skip byte order mark if it exists at the beginning of the UTF-8 text. + skipBom(features_.skipBom_); + bool successful = readValue(); + nodes_.pop(); + Token token; + skipCommentTokens(token); + if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) { + addError("Extra non-whitespace after JSON value.", token); + return false; + } + if (collectComments_ && !commentsBefore_.empty()) + root.setComment(commentsBefore_, commentAfter); + if (features_.strictRoot_) { + if (!root.isArray() && !root.isObject()) { + // Set error location to start of doc, ideally should be first token found + // in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( + "A valid JSON document must be either an array or an object value.", + token); + return false; + } + } + return successful; +} + +bool OurReader::readValue() { + // To preserve the old behaviour we cast size_t to int. + if (nodes_.size() > features_.stackLimit_) + throwRuntimeError("Exceeded stackLimit in readValue()."); + Token token; + skipCommentTokens(token); + bool successful = true; + + if (collectComments_ && !commentsBefore_.empty()) { + currentValue().setComment(commentsBefore_, commentBefore); + commentsBefore_.clear(); + } + + switch (token.type_) { + case tokenObjectBegin: + successful = readObject(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenArrayBegin: + successful = readArray(token); + currentValue().setOffsetLimit(current_ - begin_); + break; + case tokenNumber: + successful = decodeNumber(token); + break; + case tokenString: + successful = decodeString(token); + break; + case tokenTrue: { + Value v(true); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenFalse: { + Value v(false); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNull: { + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNaN: { + Value v(std::numeric_limits<double>::quiet_NaN()); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenPosInf: { + Value v(std::numeric_limits<double>::infinity()); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenNegInf: { + Value v(-std::numeric_limits<double>::infinity()); + currentValue().swapPayload(v); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + } break; + case tokenArraySeparator: + case tokenObjectEnd: + case tokenArrayEnd: + if (features_.allowDroppedNullPlaceholders_) { + // "Un-read" the current token and mark the current value as a null + // token. + current_--; + Value v; + currentValue().swapPayload(v); + currentValue().setOffsetStart(current_ - begin_ - 1); + currentValue().setOffsetLimit(current_ - begin_); + break; + } // else, fall through ... + default: + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return addError("Syntax error: value, object or array expected.", token); + } + + if (collectComments_) { + lastValueEnd_ = current_; + lastValueHasAComment_ = false; + lastValue_ = ¤tValue(); + } + + return successful; +} + +void OurReader::skipCommentTokens(Token& token) { + if (features_.allowComments_) { + do { + readToken(token); + } while (token.type_ == tokenComment); + } else { + readToken(token); + } +} + +bool OurReader::readToken(Token& token) { + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch (c) { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '\'': + if (features_.allowSingleQuotes_) { + token.type_ = tokenString; + ok = readStringSingleQuote(); + } else { + // If we don't allow single quotes, this is a failure case. + ok = false; + } + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + token.type_ = tokenNumber; + readNumber(false); + break; + case '-': + if (readNumber(true)) { + token.type_ = tokenNumber; + } else { + token.type_ = tokenNegInf; + ok = features_.allowSpecialFloats_ && match("nfinity", 7); + } + break; + case '+': + if (readNumber(true)) { + token.type_ = tokenNumber; + } else { + token.type_ = tokenPosInf; + ok = features_.allowSpecialFloats_ && match("nfinity", 7); + } + break; + case 't': + token.type_ = tokenTrue; + ok = match("rue", 3); + break; + case 'f': + token.type_ = tokenFalse; + ok = match("alse", 4); + break; + case 'n': + token.type_ = tokenNull; + ok = match("ull", 3); + break; + case 'N': + if (features_.allowSpecialFloats_) { + token.type_ = tokenNaN; + ok = match("aN", 2); + } else { + ok = false; + } + break; + case 'I': + if (features_.allowSpecialFloats_) { + token.type_ = tokenPosInf; + ok = match("nfinity", 7); + } else { + ok = false; + } + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if (!ok) + token.type_ = tokenError; + token.end_ = current_; + return ok; +} + +void OurReader::skipSpaces() { + while (current_ != end_) { + Char c = *current_; + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + ++current_; + else + break; + } +} + +void OurReader::skipBom(bool skipBom) { + // The default behavior is to skip BOM. + if (skipBom) { + if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) { + begin_ += 3; + current_ = begin_; + } + } +} + +bool OurReader::match(const Char* pattern, int patternLength) { + if (end_ - current_ < patternLength) + return false; + int index = patternLength; + while (index--) + if (current_[index] != pattern[index]) + return false; + current_ += patternLength; + return true; +} + +bool OurReader::readComment() { + const Location commentBegin = current_ - 1; + const Char c = getNextChar(); + bool successful = false; + bool cStyleWithEmbeddedNewline = false; + + const bool isCStyleComment = (c == '*'); + const bool isCppStyleComment = (c == '/'); + if (isCStyleComment) { + successful = readCStyleComment(&cStyleWithEmbeddedNewline); + } else if (isCppStyleComment) { + successful = readCppStyleComment(); + } + + if (!successful) + return false; + + if (collectComments_) { + CommentPlacement placement = commentBefore; + + if (!lastValueHasAComment_) { + if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) { + if (isCppStyleComment || !cStyleWithEmbeddedNewline) { + placement = commentAfterOnSameLine; + lastValueHasAComment_ = true; + } + } + } + + addComment(commentBegin, current_, placement); + } + return true; +} + +String OurReader::normalizeEOL(OurReader::Location begin, + OurReader::Location end) { + String normalized; + normalized.reserve(static_cast<size_t>(end - begin)); + OurReader::Location current = begin; + while (current != end) { + char c = *current++; + if (c == '\r') { + if (current != end && *current == '\n') + // convert dos EOL + ++current; + // convert Mac EOL + normalized += '\n'; + } else { + normalized += c; + } + } + return normalized; +} + +void OurReader::addComment(Location begin, Location end, + CommentPlacement placement) { + assert(collectComments_); + const String& normalized = normalizeEOL(begin, end); + if (placement == commentAfterOnSameLine) { + assert(lastValue_ != nullptr); + lastValue_->setComment(normalized, placement); + } else { + commentsBefore_ += normalized; + } +} + +bool OurReader::readCStyleComment(bool* containsNewLineResult) { + *containsNewLineResult = false; + + while ((current_ + 1) < end_) { + Char c = getNextChar(); + if (c == '*' && *current_ == '/') + break; + if (c == '\n') + *containsNewLineResult = true; + } + + return getNextChar() == '/'; +} + +bool OurReader::readCppStyleComment() { + while (current_ != end_) { + Char c = getNextChar(); + if (c == '\n') + break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } + } + return true; +} + +bool OurReader::readNumber(bool checkInf) { + Location p = current_; + if (checkInf && p != end_ && *p == 'I') { + current_ = ++p; + return false; + } + char c = '0'; // stopgap for already consumed character + // integral part + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + // fractional part + if (c == '.') { + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } + // exponential part + if (c == 'e' || c == 'E') { + c = (current_ = p) < end_ ? *p++ : '\0'; + if (c == '+' || c == '-') + c = (current_ = p) < end_ ? *p++ : '\0'; + while (c >= '0' && c <= '9') + c = (current_ = p) < end_ ? *p++ : '\0'; + } + return true; +} +bool OurReader::readString() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '"') + break; + } + return c == '"'; +} + +bool OurReader::readStringSingleQuote() { + Char c = 0; + while (current_ != end_) { + c = getNextChar(); + if (c == '\\') + getNextChar(); + else if (c == '\'') + break; + } + return c == '\''; +} + +bool OurReader::readObject(Token& token) { + Token tokenName; + String name; + Value init(objectValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); + while (readToken(tokenName)) { + bool initialTokenOk = true; + while (tokenName.type_ == tokenComment && initialTokenOk) + initialTokenOk = readToken(tokenName); + if (!initialTokenOk) + break; + if (tokenName.type_ == tokenObjectEnd && + (name.empty() || + features_.allowTrailingCommas_)) // empty object or trailing comma + return true; + name.clear(); + if (tokenName.type_ == tokenString) { + if (!decodeString(tokenName, name)) + return recoverFromError(tokenObjectEnd); + } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) { + Value numberName; + if (!decodeNumber(tokenName, numberName)) + return recoverFromError(tokenObjectEnd); + name = numberName.asString(); + } else { + break; + } + if (name.length() >= (1U << 30)) + throwRuntimeError("keylength >= 2^30"); + if (features_.rejectDupKeys_ && currentValue().isMember(name)) { + String msg = "Duplicate key: '" + name + "'"; + return addErrorAndRecover(msg, tokenName, tokenObjectEnd); + } + + Token colon; + if (!readToken(colon) || colon.type_ != tokenMemberSeparator) { + return addErrorAndRecover("Missing ':' after object member name", colon, + tokenObjectEnd); + } + Value& value = currentValue()[name]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenObjectEnd); + + Token comma; + if (!readToken(comma) || + (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment)) { + return addErrorAndRecover("Missing ',' or '}' in object declaration", + comma, tokenObjectEnd); + } + bool finalizeTokenOk = true; + while (comma.type_ == tokenComment && finalizeTokenOk) + finalizeTokenOk = readToken(comma); + if (comma.type_ == tokenObjectEnd) + return true; + } + return addErrorAndRecover("Missing '}' or object member name", tokenName, + tokenObjectEnd); +} + +bool OurReader::readArray(Token& token) { + Value init(arrayValue); + currentValue().swapPayload(init); + currentValue().setOffsetStart(token.start_ - begin_); + int index = 0; + for (;;) { + skipSpaces(); + if (current_ != end_ && *current_ == ']' && + (index == 0 || + (features_.allowTrailingCommas_ && + !features_.allowDroppedNullPlaceholders_))) // empty array or trailing + // comma + { + Token endArray; + readToken(endArray); + return true; + } + Value& value = currentValue()[index++]; + nodes_.push(&value); + bool ok = readValue(); + nodes_.pop(); + if (!ok) // error already set + return recoverFromError(tokenArrayEnd); + + Token currentToken; + // Accept Comment after last item in the array. + ok = readToken(currentToken); + while (currentToken.type_ == tokenComment && ok) { + ok = readToken(currentToken); + } + bool badTokenType = (currentToken.type_ != tokenArraySeparator && + currentToken.type_ != tokenArrayEnd); + if (!ok || badTokenType) { + return addErrorAndRecover("Missing ',' or ']' in array declaration", + currentToken, tokenArrayEnd); + } + if (currentToken.type_ == tokenArrayEnd) + break; + } + return true; +} + +bool OurReader::decodeNumber(Token& token) { + Value decoded; + if (!decodeNumber(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeNumber(Token& token, Value& decoded) { + // Attempts to parse the number as an integer. If the number is + // larger than the maximum supported value of an integer then + // we decode the number as a double. + Location current = token.start_; + const bool isNegative = *current == '-'; + if (isNegative) { + ++current; + } + + // We assume we can represent the largest and smallest integer types as + // unsigned integers with separate sign. This is only true if they can fit + // into an unsigned integer. + static_assert(Value::maxLargestInt <= Value::maxLargestUInt, + "Int must be smaller than UInt"); + + // We need to convert minLargestInt into a positive number. The easiest way + // to do this conversion is to assume our "threshold" value of minLargestInt + // divided by 10 can fit in maxLargestInt when absolute valued. This should + // be a safe assumption. + static_assert(Value::minLargestInt <= -Value::maxLargestInt, + "The absolute value of minLargestInt must be greater than or " + "equal to maxLargestInt"); + static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt, + "The absolute value of minLargestInt must be only 1 magnitude " + "larger than maxLargest Int"); + + static constexpr Value::LargestUInt positive_threshold = + Value::maxLargestUInt / 10; + static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10; + + // For the negative values, we have to be more careful. Since typically + // -Value::minLargestInt will cause an overflow, we first divide by 10 and + // then take the inverse. This assumes that minLargestInt is only a single + // power of 10 different in magnitude, which we check above. For the last + // digit, we take the modulus before negating for the same reason. + static constexpr auto negative_threshold = + Value::LargestUInt(-(Value::minLargestInt / 10)); + static constexpr auto negative_last_digit = + Value::UInt(-(Value::minLargestInt % 10)); + + const Value::LargestUInt threshold = + isNegative ? negative_threshold : positive_threshold; + const Value::UInt max_last_digit = + isNegative ? negative_last_digit : positive_last_digit; + + Value::LargestUInt value = 0; + while (current < token.end_) { + Char c = *current++; + if (c < '0' || c > '9') + return decodeDouble(token, decoded); + + const auto digit(static_cast<Value::UInt>(c - '0')); + if (value >= threshold) { + // We've hit or exceeded the max value divided by 10 (rounded down). If + // a) we've only just touched the limit, meaing value == threshold, + // b) this is the last digit, or + // c) it's small enough to fit in that rounding delta, we're okay. + // Otherwise treat this number as a double to avoid overflow. + if (value > threshold || current != token.end_ || + digit > max_last_digit) { + return decodeDouble(token, decoded); + } + } + value = value * 10 + digit; + } + + if (isNegative) { + // We use the same magnitude assumption here, just in case. + const auto last_digit = static_cast<Value::UInt>(value % 10); + decoded = -Value::LargestInt(value / 10) * 10 - last_digit; + } else if (value <= Value::LargestUInt(Value::maxLargestInt)) { + decoded = Value::LargestInt(value); + } else { + decoded = value; + } + + return true; +} + +bool OurReader::decodeDouble(Token& token) { + Value decoded; + if (!decodeDouble(token, decoded)) + return false; + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeDouble(Token& token, Value& decoded) { + double value = 0; + const String buffer(token.start_, token.end_); + IStringStream is(buffer); + if (!(is >> value)) { + return addError( + "'" + String(token.start_, token.end_) + "' is not a number.", token); + } + decoded = value; + return true; +} + +bool OurReader::decodeString(Token& token) { + String decoded_string; + if (!decodeString(token, decoded_string)) + return false; + Value decoded(decoded_string); + currentValue().swapPayload(decoded); + currentValue().setOffsetStart(token.start_ - begin_); + currentValue().setOffsetLimit(token.end_ - begin_); + return true; +} + +bool OurReader::decodeString(Token& token, String& decoded) { + decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2)); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while (current != end) { + Char c = *current++; + if (c == '"') + break; + if (c == '\\') { + if (current == end) + return addError("Empty escape sequence in string", token, current); + Char escape = *current++; + switch (escape) { + case '"': + decoded += '"'; + break; + case '/': + decoded += '/'; + break; + case '\\': + decoded += '\\'; + break; + case 'b': + decoded += '\b'; + break; + case 'f': + decoded += '\f'; + break; + case 'n': + decoded += '\n'; + break; + case 'r': + decoded += '\r'; + break; + case 't': + decoded += '\t'; + break; + case 'u': { + unsigned int unicode; + if (!decodeUnicodeCodePoint(token, current, end, unicode)) + return false; + decoded += codePointToUTF8(unicode); + } break; + default: + return addError("Bad escape sequence in string", token, current); + } + } else { + decoded += c; + } + } + return true; +} + +bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current, + Location end, unsigned int& unicode) { + + if (!decodeUnicodeEscapeSequence(token, current, end, unicode)) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) { + // surrogate pairs + if (end - current < 6) + return addError( + "additional six characters expected to parse unicode surrogate pair.", + token, current); + if (*(current++) == '\\' && *(current++) == 'u') { + unsigned int surrogatePair; + if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } else + return false; + } else + return addError("expecting another \\u token to begin the second half of " + "a unicode surrogate pair", + token, current); + } + return true; +} + +bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current, + Location end, + unsigned int& ret_unicode) { + if (end - current < 4) + return addError( + "Bad unicode escape sequence in string: four digits expected.", token, + current); + int unicode = 0; + for (int index = 0; index < 4; ++index) { + Char c = *current++; + unicode *= 16; + if (c >= '0' && c <= '9') + unicode += c - '0'; + else if (c >= 'a' && c <= 'f') + unicode += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + unicode += c - 'A' + 10; + else + return addError( + "Bad unicode escape sequence in string: hexadecimal digit expected.", + token, current); + } + ret_unicode = static_cast<unsigned int>(unicode); + return true; +} + +bool OurReader::addError(const String& message, Token& token, Location extra) { + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back(info); + return false; +} + +bool OurReader::recoverFromError(TokenType skipUntilToken) { + size_t errorCount = errors_.size(); + Token skip; + for (;;) { + if (!readToken(skip)) + errors_.resize(errorCount); // discard errors caused by recovery + if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream) + break; + } + errors_.resize(errorCount); + return false; +} + +bool OurReader::addErrorAndRecover(const String& message, Token& token, + TokenType skipUntilToken) { + addError(message, token); + return recoverFromError(skipUntilToken); +} + +Value& OurReader::currentValue() { return *(nodes_.top()); } + +OurReader::Char OurReader::getNextChar() { + if (current_ == end_) + return 0; + return *current_++; +} + +void OurReader::getLocationLineAndColumn(Location location, int& line, + int& column) const { + Location current = begin_; + Location lastLineStart = current; + line = 0; + while (current < location && current != end_) { + Char c = *current++; + if (c == '\r') { + if (*current == '\n') + ++current; + lastLineStart = current; + ++line; + } else if (c == '\n') { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + +String OurReader::getLocationLineAndColumn(Location location) const { + int line, column; + getLocationLineAndColumn(location, line, column); + char buffer[18 + 16 + 16 + 1]; + jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column); + return buffer; +} + +String OurReader::getFormattedErrorMessages() const { + String formattedMessage; + for (const auto& error : errors_) { + formattedMessage += + "* " + getLocationLineAndColumn(error.token_.start_) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if (error.extra_) + formattedMessage += + "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n"; + } + return formattedMessage; +} + +std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const { + std::vector<OurReader::StructuredError> allErrors; + for (const auto& error : errors_) { + OurReader::StructuredError structured; + structured.offset_start = error.token_.start_ - begin_; + structured.offset_limit = error.token_.end_ - begin_; + structured.message = error.message_; + allErrors.push_back(structured); + } + return allErrors; +} + +class OurCharReader : public CharReader { + bool const collectComments_; + OurReader reader_; + +public: + OurCharReader(bool collectComments, OurFeatures const& features) + : collectComments_(collectComments), reader_(features) {} + bool parse(char const* beginDoc, char const* endDoc, Value* root, + String* errs) override { + bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_); + if (errs) { + *errs = reader_.getFormattedErrorMessages(); + } + return ok; + } +}; + +CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); } +CharReaderBuilder::~CharReaderBuilder() = default; +CharReader* CharReaderBuilder::newCharReader() const { + bool collectComments = settings_["collectComments"].asBool(); + OurFeatures features = OurFeatures::all(); + features.allowComments_ = settings_["allowComments"].asBool(); + features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool(); + features.strictRoot_ = settings_["strictRoot"].asBool(); + features.allowDroppedNullPlaceholders_ = + settings_["allowDroppedNullPlaceholders"].asBool(); + features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool(); + features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool(); + + // Stack limit is always a size_t, so we get this as an unsigned int + // regardless of it we have 64-bit integer support enabled. + features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt()); + features.failIfExtra_ = settings_["failIfExtra"].asBool(); + features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool(); + features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool(); + features.skipBom_ = settings_["skipBom"].asBool(); + return new OurCharReader(collectComments, features); +} + +bool CharReaderBuilder::validate(Json::Value* invalid) const { + static const auto& valid_keys = *new std::set<String>{ + "collectComments", + "allowComments", + "allowTrailingCommas", + "strictRoot", + "allowDroppedNullPlaceholders", + "allowNumericKeys", + "allowSingleQuotes", + "stackLimit", + "failIfExtra", + "rejectDupKeys", + "allowSpecialFloats", + "skipBom", + }; + for (auto si = settings_.begin(); si != settings_.end(); ++si) { + auto key = si.name(); + if (valid_keys.count(key)) + continue; + if (invalid) + (*invalid)[key] = *si; + else + return false; + } + return invalid ? invalid->empty() : true; +} + +Value& CharReaderBuilder::operator[](const String& key) { + return settings_[key]; +} +// static +void CharReaderBuilder::strictMode(Json::Value* settings) { + //! [CharReaderBuilderStrictMode] + (*settings)["allowComments"] = false; + (*settings)["allowTrailingCommas"] = false; + (*settings)["strictRoot"] = true; + (*settings)["allowDroppedNullPlaceholders"] = false; + (*settings)["allowNumericKeys"] = false; + (*settings)["allowSingleQuotes"] = false; + (*settings)["stackLimit"] = 1000; + (*settings)["failIfExtra"] = true; + (*settings)["rejectDupKeys"] = true; + (*settings)["allowSpecialFloats"] = false; + (*settings)["skipBom"] = true; + //! [CharReaderBuilderStrictMode] +} +// static +void CharReaderBuilder::setDefaults(Json::Value* settings) { + //! [CharReaderBuilderDefaults] + (*settings)["collectComments"] = true; + (*settings)["allowComments"] = true; + (*settings)["allowTrailingCommas"] = true; + (*settings)["strictRoot"] = false; + (*settings)["allowDroppedNullPlaceholders"] = false; + (*settings)["allowNumericKeys"] = false; + (*settings)["allowSingleQuotes"] = false; + (*settings)["stackLimit"] = 1000; + (*settings)["failIfExtra"] = false; + (*settings)["rejectDupKeys"] = false; + (*settings)["allowSpecialFloats"] = false; + (*settings)["skipBom"] = true; + //! [CharReaderBuilderDefaults] +} + +////////////////////////////////// +// global functions + +bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root, + String* errs) { + OStringStream ssin; + ssin << sin.rdbuf(); + String doc = ssin.str(); + char const* begin = doc.data(); + char const* end = begin + doc.size(); + // Note that we do not actually need a null-terminator. + CharReaderPtr const reader(fact.newCharReader()); + return reader->parse(begin, end, root, errs); +} + +IStream& operator>>(IStream& sin, Value& root) { + CharReaderBuilder b; + String errs; + bool ok = parseFromStream(b, sin, &root, &errs); + if (!ok) { + throwRuntimeError(errs); + } + return sin; +} + +} // namespace Json |