123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873 | // Copyright (C) 2020 The Qt Company Ltd.// Copyright (C) 2021 Intel Corporation.// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only#ifndef QT_BOOTSTRAPPED#include <qcoreapplication.h>#endif#include <qdebug.h>#include"qjsonparser_p.h"#include"qjson_p.h"#include"private/qstringconverter_p.h"#include"private/qcborvalue_p.h"#include"private/qnumeric_p.h"#include <private/qtools_p.h>static const int nestingLimit =1024; QT_BEGIN_NAMESPACE using namespace QtMiscUtils;// error strings for the JSON parser#define JSONERR_OK QT_TRANSLATE_NOOP("QJsonParseError","no error occurred")#define JSONERR_UNTERM_OBJ QT_TRANSLATE_NOOP("QJsonParseError","unterminated object")#define JSONERR_MISS_NSEP QT_TRANSLATE_NOOP("QJsonParseError","missing name separator")#define JSONERR_UNTERM_AR QT_TRANSLATE_NOOP("QJsonParseError","unterminated array")#define JSONERR_MISS_VSEP QT_TRANSLATE_NOOP("QJsonParseError","missing value separator")#define JSONERR_ILLEGAL_VAL QT_TRANSLATE_NOOP("QJsonParseError","illegal value")#define JSONERR_END_OF_NUM QT_TRANSLATE_NOOP("QJsonParseError","invalid termination by number")#define JSONERR_ILLEGAL_NUM QT_TRANSLATE_NOOP("QJsonParseError","illegal number")#define JSONERR_STR_ESC_SEQ QT_TRANSLATE_NOOP("QJsonParseError","invalid escape sequence")#define JSONERR_STR_UTF8 QT_TRANSLATE_NOOP("QJsonParseError","invalid UTF8 string")#define JSONERR_UTERM_STR QT_TRANSLATE_NOOP("QJsonParseError","unterminated string")#define JSONERR_MISS_OBJ QT_TRANSLATE_NOOP("QJsonParseError","object is missing after a comma")#define JSONERR_DEEP_NEST QT_TRANSLATE_NOOP("QJsonParseError","too deeply nested document")#define JSONERR_DOC_LARGE QT_TRANSLATE_NOOP("QJsonParseError","too large document")#define JSONERR_GARBAGEEND QT_TRANSLATE_NOOP("QJsonParseError","garbage at the end of the document")/*! \class QJsonParseError \inmodule QtCore \ingroup json \ingroup shared \ingroup qtserialization \reentrant \since 5.0 \brief The QJsonParseError class is used to report errors during JSON parsing. \sa {JSON Support in Qt}, {Saving and Loading a Game}*//*! \enum QJsonParseError::ParseError This enum describes the type of error that occurred during the parsing of a JSON document. \value NoError No error occurred \value UnterminatedObject An object is not correctly terminated with a closing curly bracket \value MissingNameSeparator A comma separating different items is missing \value UnterminatedArray The array is not correctly terminated with a closing square bracket \value MissingValueSeparator A colon separating keys from values inside objects is missing \value IllegalValue The value is illegal \value TerminationByNumber The input stream ended while parsing a number (as of 6.9, this is no longer returned) \value IllegalNumber The number is not well formed \value IllegalEscapeSequence An illegal escape sequence occurred in the input \value IllegalUTF8String An illegal UTF8 sequence occurred in the input \value UnterminatedString A string wasn't terminated with a quote \value MissingObject An object was expected but couldn't be found \value DeepNesting The JSON document is too deeply nested for the parser to parse it \value DocumentTooLarge The JSON document is too large for the parser to parse it \value GarbageAtEnd The parsed document contains additional garbage characters at the end*//*! \variable QJsonParseError::error Contains the type of the parse error. Is equal to QJsonParseError::NoError if the document was parsed correctly. \sa ParseError, errorString()*//*! \variable QJsonParseError::offset Contains the byte offset in the UTF-8 byte array where the parse error occurred. \sa error, errorString(), QJsonDocument::fromJson()*//*! Returns the human-readable message appropriate to the reported JSON parsing error. \sa error */ QString QJsonParseError::errorString()const{const char*sz ="";switch(error) {case NoError: sz = JSONERR_OK;break;case UnterminatedObject: sz = JSONERR_UNTERM_OBJ;break;case MissingNameSeparator: sz = JSONERR_MISS_NSEP;break;case UnterminatedArray: sz = JSONERR_UNTERM_AR;break;case MissingValueSeparator: sz = JSONERR_MISS_VSEP;break;case IllegalValue: sz = JSONERR_ILLEGAL_VAL;break;case TerminationByNumber: sz = JSONERR_END_OF_NUM;break;case IllegalNumber: sz = JSONERR_ILLEGAL_NUM;break;case IllegalEscapeSequence: sz = JSONERR_STR_ESC_SEQ;break;case IllegalUTF8String: sz = JSONERR_STR_UTF8;break;case UnterminatedString: sz = JSONERR_UTERM_STR;break;case MissingObject: sz = JSONERR_MISS_OBJ;break;case DeepNesting: sz = JSONERR_DEEP_NEST;break;case DocumentTooLarge: sz = JSONERR_DOC_LARGE;break;case GarbageAtEnd: sz = JSONERR_GARBAGEEND;break;}#ifndef QT_BOOTSTRAPPEDreturnQCoreApplication::translate("QJsonParseError", sz);#elsereturnQLatin1StringView(sz);#endif}using namespace QJsonPrivate;class StashedContainer {Q_DISABLE_COPY_MOVE(StashedContainer)public:StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> *container,QCborValue::Type type):type(type),stashed(std::move(*container)){} QCborValue intoValue(QExplicitlySharedDataPointer<QCborContainerPrivate> *parent){std::swap(stashed, *parent);returnQCborContainerPrivate::makeValue(type, -1, stashed.take(),QCborContainerPrivate::MoveContainer);}private:QCborValue::Type type; QExplicitlySharedDataPointer<QCborContainerPrivate> stashed;};Parser::Parser(const char*json,int length):head(json),json(json),nestingLevel(0),lastError(QJsonParseError::NoError){ end = json + length;}/*begin-array = ws %x5B ws ; [ left square bracketbegin-object = ws %x7B ws ; { left curly bracketend-array = ws %x5D ws ; ] right square bracketend-object = ws %x7D ws ; } right curly bracketname-separator = ws %x3A ws ; : colonvalue-separator = ws %x2C ws ; , commaInsignificant whitespace is allowed before or after any of the sixstructural characters.ws = *( %x20 / ; Space %x09 / ; Horizontal tab %x0A / ; Line feed or New line %x0D ; Carriage return )*/enum{ Space =0x20, Tab =0x09, LineFeed =0x0a, Return =0x0d, BeginArray =0x5b, BeginObject =0x7b, EndArray =0x5d, EndObject =0x7d, NameSeparator =0x3a, ValueSeparator =0x2c, Quote =0x22};voidParser::eatBOM(){// eat UTF-8 byte order mark uchar utf8bom[3] = {0xef,0xbb,0xbf};if(end - json >3&&(uchar)json[0] == utf8bom[0] &&(uchar)json[1] == utf8bom[1] &&(uchar)json[2] == utf8bom[2]) json +=3;}boolParser::eatSpace(){while(json < end) {if(*json > Space)break;if(*json != Space &&*json != Tab &&*json != LineFeed &&*json != Return)break;++json;}return(json < end);}charParser::nextToken(){if(!eatSpace())return0;char token = *json++;switch(token) {case BeginArray:case BeginObject:case NameSeparator:case ValueSeparator:case EndArray:case EndObject:case Quote:break;default: token =0;break;}return token;}/* JSON-text = object / array*/ QCborValue Parser::parse(QJsonParseError *error){eatBOM();char token; QCborValue value;if(!eatSpace()) { lastError =QJsonParseError::IllegalValue;goto error;} token = *json;if(token == Quote) { container =new QCborContainerPrivate; json++;if(!parseString())goto error; value =QCborContainerPrivate::makeValue(QCborValue::String,0, container.take(),QCborContainerPrivate::MoveContainer);}else{ value =parseValue();if(value.isUndefined())goto error;}eatSpace();if(json < end) { lastError =QJsonParseError::GarbageAtEnd;goto error;}{if(error) { error->offset =0; error->error =QJsonParseError::NoError;}return value;} error: container.reset();if(error) { error->offset = json - head; error->error = lastError;}returnQCborValue();}// We need to retain the _last_ value for any duplicate keys and we need to deref containers.// Therefore the manual implementation of std::unique().template<typename Iterator, typename Compare, typename Assign>static Iterator customAssigningUniqueLast(Iterator first, Iterator last, Compare compare, Assign assign){ first =std::adjacent_find(first, last, compare);if(first == last)return last;// After adjacent_find, we know that *first and *(first+1) compare equal,// and that first+1 != last. Iterator result = first++;Q_ASSERT(compare(*result, *first));assign(*result, *first);Q_ASSERT(first != last);while(++first != last) {if(!compare(*result, *first))++result;// Due to adjacent_find above, we know that we've at least eliminated one element.// Therefore we have to move each further element across the gap.Q_ASSERT(result != first);// We have to overwrite each element we want to eliminate, to deref() the container.// Therefore we don't try to optimize the number of assignments here.assign(*result, *first);}return++result;}static voidsortContainer(QCborContainerPrivate *container){using Forward =QJsonPrivate::KeyIterator;using Value =Forward::value_type;auto compare = [container](const Value &a,const Value &b){constauto&aKey = a.key();constauto&bKey = b.key();Q_ASSERT(aKey.flags &QtCbor::Element::HasByteData);Q_ASSERT(bKey.flags &QtCbor::Element::HasByteData);constQtCbor::ByteData *aData = container->byteData(aKey);constQtCbor::ByteData *bData = container->byteData(bKey);if(!aData)return bData ? -1:0;if(!bData)return1;// US-ASCII (StringIsAscii flag) is just a special case of UTF-8// string, so we can safely ignore the flag.if(aKey.flags &QtCbor::Element::StringIsUtf16) {if(bKey.flags &QtCbor::Element::StringIsUtf16)returnQtPrivate::compareStrings(aData->asStringView(), bData->asStringView());return-QCborContainerPrivate::compareUtf8(bData, aData->asStringView());}else{if(bKey.flags &QtCbor::Element::StringIsUtf16)returnQCborContainerPrivate::compareUtf8(aData, bData->asStringView());returnQtPrivate::compareStrings(aData->asUtf8StringView(), bData->asUtf8StringView());}};// The elements' containers are owned by the outer container, not by the elements themselves.auto move = [](Forward::reference target,Forward::reference source){QtCbor::Element &targetValue = target.value();// If the target has a container, deref it before overwriting, so that we don't leak.if(targetValue.flags &QtCbor::Element::IsContainer) targetValue.container->deref();// Do not move, so that we can clear the value afterwards. target = source;// Clear the source value, so that we don't store the same container twice. source.value() =QtCbor::Element();};std::stable_sort(Forward(container->elements.begin()),Forward(container->elements.end()),[&compare](const Value &a,const Value &b) {returncompare(a, b) <0; }); Forward result =customAssigningUniqueLast(Forward(container->elements.begin()),Forward(container->elements.end()),[&compare](const Value &a,const Value &b) {returncompare(a, b) ==0; }, move); container->elements.erase(result.elementsIterator(), container->elements.end());}boolParser::parseValueIntoContainer(){ QCborValue value =parseValue();switch(value.type()) {caseQCborValue::Undefined:return false;// error while parsingcaseQCborValue::String:break;// strings were already addeddefault: container->append(std::move(value));}return true;}/* object = begin-object [ member *( value-separator member ) ] end-object*/boolParser::parseObject(){if(++nestingLevel > nestingLimit) { lastError =QJsonParseError::DeepNesting;return false;}char token =nextToken();while(token == Quote) {if(!container) container =new QCborContainerPrivate;if(!parseMember())return false; token =nextToken();if(token != ValueSeparator)break; token =nextToken();if(token == EndObject) { lastError =QJsonParseError::MissingObject;return false;}}if(token != EndObject) { lastError =QJsonParseError::UnterminatedObject;return false;}--nestingLevel;if(container)sortContainer(container.data());return true;}/* member = string name-separator value*/boolParser::parseMember(){if(!parseString())return false;char token =nextToken();if(token != NameSeparator) { lastError =QJsonParseError::MissingNameSeparator;return false;}if(!eatSpace()) { lastError =QJsonParseError::UnterminatedObject;return false;}returnparseValueIntoContainer();}/* array = begin-array [ value *( value-separator value ) ] end-array*/boolParser::parseArray(){if(++nestingLevel > nestingLimit) { lastError =QJsonParseError::DeepNesting;return false;}if(!eatSpace()) { lastError =QJsonParseError::UnterminatedArray;return false;}if(*json == EndArray) {nextToken();}else{while(1) {if(!eatSpace()) { lastError =QJsonParseError::UnterminatedArray;return false;}if(!container) container =new QCborContainerPrivate;if(!parseValueIntoContainer())return false;char token =nextToken();if(token == EndArray)break;else if(token != ValueSeparator) {if(!eatSpace()) lastError =QJsonParseError::UnterminatedArray;else lastError =QJsonParseError::MissingValueSeparator;return false;}}}--nestingLevel;return true;}/*value = false / null / true / object / array / number / string*/ QCborValue Parser::parseValue(){switch(*json++) {case'n':if(end - json <3) { lastError =QJsonParseError::IllegalValue;returnQCborValue();}if(*json++ =='u'&&*json++ =='l'&&*json++ =='l') {returnQCborValue(QCborValue::Null);} lastError =QJsonParseError::IllegalValue;returnQCborValue();case't':if(end - json <3) { lastError =QJsonParseError::IllegalValue;returnQCborValue();}if(*json++ =='r'&&*json++ =='u'&&*json++ =='e') {returnQCborValue(true);} lastError =QJsonParseError::IllegalValue;returnQCborValue();case'f':if(end - json <4) { lastError =QJsonParseError::IllegalValue;returnQCborValue();}if(*json++ =='a'&&*json++ =='l'&&*json++ =='s'&&*json++ =='e') {returnQCborValue(false);} lastError =QJsonParseError::IllegalValue;returnQCborValue();case Quote: {if(parseString())// strings are already added to the container// callers must check for this typereturnQCborValue(QCborValue::String);returnQCborValue();}case BeginArray: { StashedContainer stashedContainer(&container,QCborValue::Array);if(parseArray())return stashedContainer.intoValue(&container);returnQCborValue();}case BeginObject: { StashedContainer stashedContainer(&container,QCborValue::Map);if(parseObject())return stashedContainer.intoValue(&container);returnQCborValue();}case ValueSeparator:// Essentially missing value, but after a colon, not after a comma// like the other MissingObject errors. lastError =QJsonParseError::IllegalValue;returnQCborValue();case EndObject:case EndArray: lastError =QJsonParseError::MissingObject;returnQCborValue();default:--json;returnparseNumber();}}/* number = [ minus ] int [ frac ] [ exp ] decimal-point = %x2E ; . digit1-9 = %x31-39 ; 1-9 e = %x65 / %x45 ; e E exp = e [ minus / plus ] 1*DIGIT frac = decimal-point 1*DIGIT int = zero / ( digit1-9 *DIGIT ) minus = %x2D ; - plus = %x2B ; + zero = %x30 ; 0*/ QCborValue Parser::parseNumber(){const char*start = json;bool isInt =true;// minusif(json < end && *json =='-')++json;// int = zero / ( digit1-9 *DIGIT )if(json < end && *json =='0') {++json;}else{while(json < end &&isAsciiDigit(*json))++json;}// frac = decimal-point 1*DIGITif(json < end && *json =='.') {++json;while(json < end &&isAsciiDigit(*json)) { isInt = isInt && *json =='0';++json;}}// exp = e [ minus / plus ] 1*DIGITif(json < end && (*json =='e'|| *json =='E')) { isInt =false;++json;if(json < end && (*json =='-'|| *json =='+'))++json;while(json < end &&isAsciiDigit(*json))++json;}const QByteArray number =QByteArray::fromRawData(start, json - start);if(isInt) {bool ok; qlonglong n = number.toLongLong(&ok);if(ok) {returnQCborValue(n);}}bool ok;double d = number.toDouble(&ok);if(!ok) { lastError =QJsonParseError::IllegalNumber;returnQCborValue();} qint64 n;if(convertDoubleTo(d, &n))returnQCborValue(n);returnQCborValue(d);}/* string = quotation-mark *char quotation-mark char = unescaped / escape ( %x22 / ; " quotation mark U+0022 %x5C / ; \ reverse solidus U+005C %x2F / ; / solidus U+002F %x62 / ; b backspace U+0008 %x66 / ; f form feed U+000C %x6E / ; n line feed U+000A %x72 / ; r carriage return U+000D %x74 / ; t tab U+0009 %x75 4HEXDIG ) ; uXXXX U+XXXX escape = %x5C ; \ quotation-mark = %x22 ; " unescaped = %x20-21 / %x23-5B / %x5D-10FFFF */staticinlinebooladdHexDigit(char digit, char32_t *result){*result <<=4;const int h =fromHex(digit);if(h != -1) {*result |= h;return true;}return false;}staticinlineboolscanEscapeSequence(const char*&json,const char*end, char32_t *ch){++json;if(json >= end)return false; uchar escaped = *json++;switch(escaped) {case'"':*ch ='"';break;case'\\':*ch ='\\';break;case'/':*ch ='/';break;case'b':*ch =0x8;break;case'f':*ch =0xc;break;case'n':*ch =0xa;break;case'r':*ch =0xd;break;case't':*ch =0x9;break;case'u': {*ch =0;if(json > end -4)return false;for(int i =0; i <4; ++i) {if(!addHexDigit(*json, ch))return false;++json;}return true;}default:// this is not as strict as one could be, but allows for more Json files// to be parsed correctly.*ch = escaped;return true;}return true;}staticinlineboolscanUtf8Char(const char*&json,const char*end, char32_t *result){constauto*usrc =reinterpret_cast<const uchar *>(json);constauto*uend =reinterpret_cast<const uchar *>(end);const uchar b = *usrc++; qsizetype res =QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, result, usrc, uend);if(res <0)return false; json =reinterpret_cast<const char*>(usrc);return true;}boolParser::parseString(){const char*start = json;// try to parse a utf-8 string without escape sequences, and note whether it's 7bit ASCII.bool isUtf8 =true;bool isAscii =true;while(json < end) { char32_t ch =0;if(*json =='"')break;if(*json =='\\') { isAscii =false;// If we find escape sequences, we store UTF-16 as there are some// escape sequences which are hard to represent in UTF-8.// (plain "\\ud800" for example) isUtf8 =false;break;}if(!scanUtf8Char(json, end, &ch)) { lastError =QJsonParseError::IllegalUTF8String;return false;}if(ch >0x7f) isAscii =false;}++json;if(json > end) { lastError =QJsonParseError::UnterminatedString;return false;}// no escape sequences, we are doneif(isUtf8) {if(isAscii) container->appendAsciiString(start, json - start -1);else container->appendUtf8String(start, json - start -1);return true;} json = start; QString ucs4;while(json < end) { char32_t ch =0;if(*json =='"')break;else if(*json =='\\') {if(!scanEscapeSequence(json, end, &ch)) { lastError =QJsonParseError::IllegalEscapeSequence;return false;}}else{if(!scanUtf8Char(json, end, &ch)) { lastError =QJsonParseError::IllegalUTF8String;return false;}} ucs4.append(QChar::fromUcs4(ch));}++json;if(json > end) { lastError =QJsonParseError::UnterminatedString;return false;} container->appendByteData(reinterpret_cast<const char*>(ucs4.constData()), ucs4.size() *2,QCborValue::String,QtCbor::Element::StringIsUtf16);return true;} QT_END_NAMESPACE
|