reader.h 92 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246
  1. // Tencent is pleased to support the open source community by making RapidJSON available.
  2. //
  3. // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
  4. //
  5. // Licensed under the MIT License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://opensource.org/licenses/MIT
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef RAPIDJSON_READER_H_
  15. #define RAPIDJSON_READER_H_
  16. /*! \file reader.h */
  17. #include "allocators.h"
  18. #include "stream.h"
  19. #include "encodedstream.h"
  20. #include "internal/clzll.h"
  21. #include "internal/meta.h"
  22. #include "internal/stack.h"
  23. #include "internal/strtod.h"
  24. #include <limits>
  25. #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
  26. #include <intrin.h>
  27. #pragma intrinsic(_BitScanForward)
  28. #endif
  29. #ifdef RAPIDJSON_SSE42
  30. #include <nmmintrin.h>
  31. #elif defined(RAPIDJSON_SSE2)
  32. #include <emmintrin.h>
  33. #elif defined(RAPIDJSON_NEON)
  34. #include <arm_neon.h>
  35. #endif
  36. #ifdef __clang__
  37. RAPIDJSON_DIAG_PUSH
  38. RAPIDJSON_DIAG_OFF(old-style-cast)
  39. RAPIDJSON_DIAG_OFF(padded)
  40. RAPIDJSON_DIAG_OFF(switch-enum)
  41. #elif defined(_MSC_VER)
  42. RAPIDJSON_DIAG_PUSH
  43. RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
  44. RAPIDJSON_DIAG_OFF(4702) // unreachable code
  45. #endif
  46. #ifdef __GNUC__
  47. RAPIDJSON_DIAG_PUSH
  48. RAPIDJSON_DIAG_OFF(effc++)
  49. #endif
  50. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  51. #define RAPIDJSON_NOTHING /* deliberately empty */
  52. #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
  53. #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
  54. RAPIDJSON_MULTILINEMACRO_BEGIN \
  55. if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
  56. RAPIDJSON_MULTILINEMACRO_END
  57. #endif
  58. #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
  59. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
  60. //!@endcond
  61. /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
  62. \ingroup RAPIDJSON_ERRORS
  63. \brief Macro to indicate a parse error.
  64. \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
  65. \param offset position of the error in JSON input (\c size_t)
  66. This macros can be used as a customization point for the internal
  67. error handling mechanism of RapidJSON.
  68. A common usage model is to throw an exception instead of requiring the
  69. caller to explicitly check the \ref rapidjson::GenericReader::Parse's
  70. return value:
  71. \code
  72. #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
  73. throw ParseException(parseErrorCode, #parseErrorCode, offset)
  74. #include <stdexcept> // std::runtime_error
  75. #include "rapidjson/error/error.h" // rapidjson::ParseResult
  76. struct ParseException : std::runtime_error, rapidjson::ParseResult {
  77. ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
  78. : std::runtime_error(msg), ParseResult(code, offset) {}
  79. };
  80. #include "rapidjson/reader.h"
  81. \endcode
  82. \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
  83. */
  84. #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
  85. #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
  86. RAPIDJSON_MULTILINEMACRO_BEGIN \
  87. RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
  88. SetParseError(parseErrorCode, offset); \
  89. RAPIDJSON_MULTILINEMACRO_END
  90. #endif
  91. /*! \def RAPIDJSON_PARSE_ERROR
  92. \ingroup RAPIDJSON_ERRORS
  93. \brief (Internal) macro to indicate and handle a parse error.
  94. \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
  95. \param offset position of the error in JSON input (\c size_t)
  96. Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
  97. \see RAPIDJSON_PARSE_ERROR_NORETURN
  98. \hideinitializer
  99. */
  100. #ifndef RAPIDJSON_PARSE_ERROR
  101. #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
  102. RAPIDJSON_MULTILINEMACRO_BEGIN \
  103. RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
  104. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
  105. RAPIDJSON_MULTILINEMACRO_END
  106. #endif
  107. #include "error/error.h" // ParseErrorCode, ParseResult
  108. RAPIDJSON_NAMESPACE_BEGIN
  109. ///////////////////////////////////////////////////////////////////////////////
  110. // ParseFlag
  111. /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
  112. \ingroup RAPIDJSON_CONFIG
  113. \brief User-defined kParseDefaultFlags definition.
  114. User can define this as any \c ParseFlag combinations.
  115. */
  116. #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
  117. #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
  118. #endif
  119. //! Combination of parseFlags
  120. /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
  121. */
  122. enum ParseFlag {
  123. kParseNoFlags = 0, //!< No flags are set.
  124. kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
  125. kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
  126. kParseIterativeFlag = 4, //!< Iterative(constant complexity in terms of function call stack size) parsing.
  127. kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
  128. kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower).
  129. kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments.
  130. kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings.
  131. kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
  132. kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
  133. kParseEscapedApostropheFlag = 512, //!< Allow escaped apostrophe in strings.
  134. kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
  135. };
  136. ///////////////////////////////////////////////////////////////////////////////
  137. // Handler
  138. /*! \class rapidjson::Handler
  139. \brief Concept for receiving events from GenericReader upon parsing.
  140. The functions return true if no error occurs. If they return false,
  141. the event publisher should terminate the process.
  142. \code
  143. concept Handler {
  144. typename Ch;
  145. bool Null();
  146. bool Bool(bool b);
  147. bool Int(int i);
  148. bool Uint(unsigned i);
  149. bool Int64(int64_t i);
  150. bool Uint64(uint64_t i);
  151. bool Double(double d);
  152. /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
  153. bool RawNumber(const Ch* str, SizeType length, bool copy);
  154. bool String(const Ch* str, SizeType length, bool copy);
  155. bool StartObject();
  156. bool Key(const Ch* str, SizeType length, bool copy);
  157. bool EndObject(SizeType memberCount);
  158. bool StartArray();
  159. bool EndArray(SizeType elementCount);
  160. };
  161. \endcode
  162. */
  163. ///////////////////////////////////////////////////////////////////////////////
  164. // BaseReaderHandler
  165. //! Default implementation of Handler.
  166. /*! This can be used as base class of any reader handler.
  167. \note implements Handler concept
  168. */
  169. template<typename Encoding = UTF8<>, typename Derived = void>
  170. struct BaseReaderHandler {
  171. typedef typename Encoding::Ch Ch;
  172. typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
  173. bool Default() { return true; }
  174. bool Null() { return static_cast<Override&>(*this).Default(); }
  175. bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
  176. bool Int(int) { return static_cast<Override&>(*this).Default(); }
  177. bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
  178. bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
  179. bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
  180. bool Double(double) { return static_cast<Override&>(*this).Default(); }
  181. /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
  182. bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
  183. bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
  184. bool StartObject() { return static_cast<Override&>(*this).Default(); }
  185. bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
  186. bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
  187. bool StartArray() { return static_cast<Override&>(*this).Default(); }
  188. bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
  189. };
  190. ///////////////////////////////////////////////////////////////////////////////
  191. // StreamLocalCopy
  192. namespace internal {
  193. template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
  194. class StreamLocalCopy;
  195. //! Do copy optimization.
  196. template<typename Stream>
  197. class StreamLocalCopy<Stream, 1> {
  198. public:
  199. StreamLocalCopy(Stream& original) : s(original), original_(original) {}
  200. ~StreamLocalCopy() { original_ = s; }
  201. Stream s;
  202. private:
  203. StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
  204. Stream& original_;
  205. };
  206. //! Keep reference.
  207. template<typename Stream>
  208. class StreamLocalCopy<Stream, 0> {
  209. public:
  210. StreamLocalCopy(Stream& original) : s(original) {}
  211. Stream& s;
  212. private:
  213. StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
  214. };
  215. } // namespace internal
  216. ///////////////////////////////////////////////////////////////////////////////
  217. // SkipWhitespace
  218. //! Skip the JSON white spaces in a stream.
  219. /*! \param is A input stream for skipping white spaces.
  220. \note This function has SSE2/SSE4.2 specialization.
  221. */
  222. template<typename InputStream>
  223. void SkipWhitespace(InputStream& is) {
  224. internal::StreamLocalCopy<InputStream> copy(is);
  225. InputStream& s(copy.s);
  226. typename InputStream::Ch c;
  227. while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
  228. s.Take();
  229. }
  230. inline const char* SkipWhitespace(const char* p, const char* end) {
  231. while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  232. ++p;
  233. return p;
  234. }
  235. #ifdef RAPIDJSON_SSE42
  236. //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
  237. inline const char *SkipWhitespace_SIMD(const char* p) {
  238. // Fast return for single non-whitespace
  239. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  240. ++p;
  241. else
  242. return p;
  243. // 16-byte align to the next boundary
  244. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  245. while (p != nextAligned)
  246. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  247. ++p;
  248. else
  249. return p;
  250. // The rest of string using SIMD
  251. static const char whitespace[16] = " \n\r\t";
  252. const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
  253. for (;; p += 16) {
  254. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  255. const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
  256. if (r != 16) // some of characters is non-whitespace
  257. return p + r;
  258. }
  259. }
  260. inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
  261. // Fast return for single non-whitespace
  262. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  263. ++p;
  264. else
  265. return p;
  266. // The middle of string using SIMD
  267. static const char whitespace[16] = " \n\r\t";
  268. const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
  269. for (; p <= end - 16; p += 16) {
  270. const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
  271. const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
  272. if (r != 16) // some of characters is non-whitespace
  273. return p + r;
  274. }
  275. return SkipWhitespace(p, end);
  276. }
  277. #elif defined(RAPIDJSON_SSE2)
  278. //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
  279. inline const char *SkipWhitespace_SIMD(const char* p) {
  280. // Fast return for single non-whitespace
  281. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  282. ++p;
  283. else
  284. return p;
  285. // 16-byte align to the next boundary
  286. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  287. while (p != nextAligned)
  288. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  289. ++p;
  290. else
  291. return p;
  292. // The rest of string
  293. #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
  294. static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
  295. #undef C16
  296. const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
  297. const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
  298. const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
  299. const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
  300. for (;; p += 16) {
  301. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  302. __m128i x = _mm_cmpeq_epi8(s, w0);
  303. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
  304. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
  305. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
  306. unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
  307. if (r != 0) { // some of characters may be non-whitespace
  308. #ifdef _MSC_VER // Find the index of first non-whitespace
  309. unsigned long offset;
  310. _BitScanForward(&offset, r);
  311. return p + offset;
  312. #else
  313. return p + __builtin_ffs(r) - 1;
  314. #endif
  315. }
  316. }
  317. }
  318. inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
  319. // Fast return for single non-whitespace
  320. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  321. ++p;
  322. else
  323. return p;
  324. // The rest of string
  325. #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
  326. static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
  327. #undef C16
  328. const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
  329. const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
  330. const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
  331. const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
  332. for (; p <= end - 16; p += 16) {
  333. const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
  334. __m128i x = _mm_cmpeq_epi8(s, w0);
  335. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
  336. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
  337. x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
  338. unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
  339. if (r != 0) { // some of characters may be non-whitespace
  340. #ifdef _MSC_VER // Find the index of first non-whitespace
  341. unsigned long offset;
  342. _BitScanForward(&offset, r);
  343. return p + offset;
  344. #else
  345. return p + __builtin_ffs(r) - 1;
  346. #endif
  347. }
  348. }
  349. return SkipWhitespace(p, end);
  350. }
  351. #elif defined(RAPIDJSON_NEON)
  352. //! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
  353. inline const char *SkipWhitespace_SIMD(const char* p) {
  354. // Fast return for single non-whitespace
  355. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  356. ++p;
  357. else
  358. return p;
  359. // 16-byte align to the next boundary
  360. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  361. while (p != nextAligned)
  362. if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
  363. ++p;
  364. else
  365. return p;
  366. const uint8x16_t w0 = vmovq_n_u8(' ');
  367. const uint8x16_t w1 = vmovq_n_u8('\n');
  368. const uint8x16_t w2 = vmovq_n_u8('\r');
  369. const uint8x16_t w3 = vmovq_n_u8('\t');
  370. for (;; p += 16) {
  371. const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
  372. uint8x16_t x = vceqq_u8(s, w0);
  373. x = vorrq_u8(x, vceqq_u8(s, w1));
  374. x = vorrq_u8(x, vceqq_u8(s, w2));
  375. x = vorrq_u8(x, vceqq_u8(s, w3));
  376. x = vmvnq_u8(x); // Negate
  377. x = vrev64q_u8(x); // Rev in 64
  378. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  379. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  380. if (low == 0) {
  381. if (high != 0) {
  382. uint32_t lz = internal::clzll(high);
  383. return p + 8 + (lz >> 3);
  384. }
  385. } else {
  386. uint32_t lz = internal::clzll(low);
  387. return p + (lz >> 3);
  388. }
  389. }
  390. }
  391. inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
  392. // Fast return for single non-whitespace
  393. if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
  394. ++p;
  395. else
  396. return p;
  397. const uint8x16_t w0 = vmovq_n_u8(' ');
  398. const uint8x16_t w1 = vmovq_n_u8('\n');
  399. const uint8x16_t w2 = vmovq_n_u8('\r');
  400. const uint8x16_t w3 = vmovq_n_u8('\t');
  401. for (; p <= end - 16; p += 16) {
  402. const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
  403. uint8x16_t x = vceqq_u8(s, w0);
  404. x = vorrq_u8(x, vceqq_u8(s, w1));
  405. x = vorrq_u8(x, vceqq_u8(s, w2));
  406. x = vorrq_u8(x, vceqq_u8(s, w3));
  407. x = vmvnq_u8(x); // Negate
  408. x = vrev64q_u8(x); // Rev in 64
  409. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  410. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  411. if (low == 0) {
  412. if (high != 0) {
  413. uint32_t lz = internal::clzll(high);
  414. return p + 8 + (lz >> 3);
  415. }
  416. } else {
  417. uint32_t lz = internal::clzll(low);
  418. return p + (lz >> 3);
  419. }
  420. }
  421. return SkipWhitespace(p, end);
  422. }
  423. #endif // RAPIDJSON_NEON
  424. #ifdef RAPIDJSON_SIMD
  425. //! Template function specialization for InsituStringStream
  426. template<> inline void SkipWhitespace(InsituStringStream& is) {
  427. is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
  428. }
  429. //! Template function specialization for StringStream
  430. template<> inline void SkipWhitespace(StringStream& is) {
  431. is.src_ = SkipWhitespace_SIMD(is.src_);
  432. }
  433. template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
  434. is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
  435. }
  436. #endif // RAPIDJSON_SIMD
  437. ///////////////////////////////////////////////////////////////////////////////
  438. // GenericReader
  439. //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
  440. /*! GenericReader parses JSON text from a stream, and send events synchronously to an
  441. object implementing Handler concept.
  442. It needs to allocate a stack for storing a single decoded string during
  443. non-destructive parsing.
  444. For in-situ parsing, the decoded string is directly written to the source
  445. text string, no temporary buffer is required.
  446. A GenericReader object can be reused for parsing multiple JSON text.
  447. \tparam SourceEncoding Encoding of the input stream.
  448. \tparam TargetEncoding Encoding of the parse output.
  449. \tparam StackAllocator Allocator type for stack.
  450. */
  451. template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
  452. class GenericReader {
  453. public:
  454. typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
  455. //! Constructor.
  456. /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
  457. \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing)
  458. */
  459. GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
  460. stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
  461. //! Parse JSON text.
  462. /*! \tparam parseFlags Combination of \ref ParseFlag.
  463. \tparam InputStream Type of input stream, implementing Stream concept.
  464. \tparam Handler Type of handler, implementing Handler concept.
  465. \param is Input stream to be parsed.
  466. \param handler The handler to receive events.
  467. \return Whether the parsing is successful.
  468. */
  469. template <unsigned parseFlags, typename InputStream, typename Handler>
  470. ParseResult Parse(InputStream& is, Handler& handler) {
  471. if (parseFlags & kParseIterativeFlag)
  472. return IterativeParse<parseFlags>(is, handler);
  473. parseResult_.Clear();
  474. ClearStackOnExit scope(*this);
  475. SkipWhitespaceAndComments<parseFlags>(is);
  476. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  477. if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
  478. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
  479. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  480. }
  481. else {
  482. ParseValue<parseFlags>(is, handler);
  483. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  484. if (!(parseFlags & kParseStopWhenDoneFlag)) {
  485. SkipWhitespaceAndComments<parseFlags>(is);
  486. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  487. if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
  488. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
  489. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  490. }
  491. }
  492. }
  493. return parseResult_;
  494. }
  495. //! Parse JSON text (with \ref kParseDefaultFlags)
  496. /*! \tparam InputStream Type of input stream, implementing Stream concept
  497. \tparam Handler Type of handler, implementing Handler concept.
  498. \param is Input stream to be parsed.
  499. \param handler The handler to receive events.
  500. \return Whether the parsing is successful.
  501. */
  502. template <typename InputStream, typename Handler>
  503. ParseResult Parse(InputStream& is, Handler& handler) {
  504. return Parse<kParseDefaultFlags>(is, handler);
  505. }
  506. //! Initialize JSON text token-by-token parsing
  507. /*!
  508. */
  509. void IterativeParseInit() {
  510. parseResult_.Clear();
  511. state_ = IterativeParsingStartState;
  512. }
  513. //! Parse one token from JSON text
  514. /*! \tparam InputStream Type of input stream, implementing Stream concept
  515. \tparam Handler Type of handler, implementing Handler concept.
  516. \param is Input stream to be parsed.
  517. \param handler The handler to receive events.
  518. \return Whether the parsing is successful.
  519. */
  520. template <unsigned parseFlags, typename InputStream, typename Handler>
  521. bool IterativeParseNext(InputStream& is, Handler& handler) {
  522. while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
  523. SkipWhitespaceAndComments<parseFlags>(is);
  524. Token t = Tokenize(is.Peek());
  525. IterativeParsingState n = Predict(state_, t);
  526. IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
  527. // If we've finished or hit an error...
  528. if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
  529. // Report errors.
  530. if (d == IterativeParsingErrorState) {
  531. HandleError(state_, is);
  532. return false;
  533. }
  534. // Transition to the finish state.
  535. RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
  536. state_ = d;
  537. // If StopWhenDone is not set...
  538. if (!(parseFlags & kParseStopWhenDoneFlag)) {
  539. // ... and extra non-whitespace data is found...
  540. SkipWhitespaceAndComments<parseFlags>(is);
  541. if (is.Peek() != '\0') {
  542. // ... this is considered an error.
  543. HandleError(state_, is);
  544. return false;
  545. }
  546. }
  547. // Success! We are done!
  548. return true;
  549. }
  550. // Transition to the new state.
  551. state_ = d;
  552. // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
  553. if (!IsIterativeParsingDelimiterState(n))
  554. return true;
  555. }
  556. // We reached the end of file.
  557. stack_.Clear();
  558. if (state_ != IterativeParsingFinishState) {
  559. HandleError(state_, is);
  560. return false;
  561. }
  562. return true;
  563. }
  564. //! Check if token-by-token parsing JSON text is complete
  565. /*! \return Whether the JSON has been fully decoded.
  566. */
  567. RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
  568. return IsIterativeParsingCompleteState(state_);
  569. }
  570. //! Whether a parse error has occurred in the last parsing.
  571. bool HasParseError() const { return parseResult_.IsError(); }
  572. //! Get the \ref ParseErrorCode of last parsing.
  573. ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
  574. //! Get the position of last parsing error in input, 0 otherwise.
  575. size_t GetErrorOffset() const { return parseResult_.Offset(); }
  576. protected:
  577. void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
  578. private:
  579. // Prohibit copy constructor & assignment operator.
  580. GenericReader(const GenericReader&);
  581. GenericReader& operator=(const GenericReader&);
  582. void ClearStack() { stack_.Clear(); }
  583. // clear stack on any exit from ParseStream, e.g. due to exception
  584. struct ClearStackOnExit {
  585. explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
  586. ~ClearStackOnExit() { r_.ClearStack(); }
  587. private:
  588. GenericReader& r_;
  589. ClearStackOnExit(const ClearStackOnExit&);
  590. ClearStackOnExit& operator=(const ClearStackOnExit&);
  591. };
  592. template<unsigned parseFlags, typename InputStream>
  593. void SkipWhitespaceAndComments(InputStream& is) {
  594. SkipWhitespace(is);
  595. if (parseFlags & kParseCommentsFlag) {
  596. while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
  597. if (Consume(is, '*')) {
  598. while (true) {
  599. if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
  600. RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
  601. else if (Consume(is, '*')) {
  602. if (Consume(is, '/'))
  603. break;
  604. }
  605. else
  606. is.Take();
  607. }
  608. }
  609. else if (RAPIDJSON_LIKELY(Consume(is, '/')))
  610. while (is.Peek() != '\0' && is.Take() != '\n') {}
  611. else
  612. RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
  613. SkipWhitespace(is);
  614. }
  615. }
  616. }
  617. // Parse object: { string : value, ... }
  618. template<unsigned parseFlags, typename InputStream, typename Handler>
  619. void ParseObject(InputStream& is, Handler& handler) {
  620. RAPIDJSON_ASSERT(is.Peek() == '{');
  621. is.Take(); // Skip '{'
  622. if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
  623. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  624. SkipWhitespaceAndComments<parseFlags>(is);
  625. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  626. if (Consume(is, '}')) {
  627. if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
  628. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  629. return;
  630. }
  631. for (SizeType memberCount = 0;;) {
  632. if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
  633. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
  634. ParseString<parseFlags>(is, handler, true);
  635. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  636. SkipWhitespaceAndComments<parseFlags>(is);
  637. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  638. if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
  639. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
  640. SkipWhitespaceAndComments<parseFlags>(is);
  641. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  642. ParseValue<parseFlags>(is, handler);
  643. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  644. SkipWhitespaceAndComments<parseFlags>(is);
  645. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  646. ++memberCount;
  647. switch (is.Peek()) {
  648. case ',':
  649. is.Take();
  650. SkipWhitespaceAndComments<parseFlags>(is);
  651. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  652. break;
  653. case '}':
  654. is.Take();
  655. if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
  656. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  657. return;
  658. default:
  659. RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
  660. }
  661. if (parseFlags & kParseTrailingCommasFlag) {
  662. if (is.Peek() == '}') {
  663. if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
  664. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  665. is.Take();
  666. return;
  667. }
  668. }
  669. }
  670. }
  671. // Parse array: [ value, ... ]
  672. template<unsigned parseFlags, typename InputStream, typename Handler>
  673. void ParseArray(InputStream& is, Handler& handler) {
  674. RAPIDJSON_ASSERT(is.Peek() == '[');
  675. is.Take(); // Skip '['
  676. if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
  677. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  678. SkipWhitespaceAndComments<parseFlags>(is);
  679. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  680. if (Consume(is, ']')) {
  681. if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
  682. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  683. return;
  684. }
  685. for (SizeType elementCount = 0;;) {
  686. ParseValue<parseFlags>(is, handler);
  687. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  688. ++elementCount;
  689. SkipWhitespaceAndComments<parseFlags>(is);
  690. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  691. if (Consume(is, ',')) {
  692. SkipWhitespaceAndComments<parseFlags>(is);
  693. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  694. }
  695. else if (Consume(is, ']')) {
  696. if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
  697. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  698. return;
  699. }
  700. else
  701. RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
  702. if (parseFlags & kParseTrailingCommasFlag) {
  703. if (is.Peek() == ']') {
  704. if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
  705. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  706. is.Take();
  707. return;
  708. }
  709. }
  710. }
  711. }
  712. template<unsigned parseFlags, typename InputStream, typename Handler>
  713. void ParseNull(InputStream& is, Handler& handler) {
  714. RAPIDJSON_ASSERT(is.Peek() == 'n');
  715. is.Take();
  716. if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
  717. if (RAPIDJSON_UNLIKELY(!handler.Null()))
  718. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  719. }
  720. else
  721. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  722. }
  723. template<unsigned parseFlags, typename InputStream, typename Handler>
  724. void ParseTrue(InputStream& is, Handler& handler) {
  725. RAPIDJSON_ASSERT(is.Peek() == 't');
  726. is.Take();
  727. if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
  728. if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
  729. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  730. }
  731. else
  732. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  733. }
  734. template<unsigned parseFlags, typename InputStream, typename Handler>
  735. void ParseFalse(InputStream& is, Handler& handler) {
  736. RAPIDJSON_ASSERT(is.Peek() == 'f');
  737. is.Take();
  738. if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
  739. if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
  740. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
  741. }
  742. else
  743. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
  744. }
  745. template<typename InputStream>
  746. RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
  747. if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
  748. is.Take();
  749. return true;
  750. }
  751. else
  752. return false;
  753. }
  754. // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
  755. template<typename InputStream>
  756. unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
  757. unsigned codepoint = 0;
  758. for (int i = 0; i < 4; i++) {
  759. Ch c = is.Peek();
  760. codepoint <<= 4;
  761. codepoint += static_cast<unsigned>(c);
  762. if (c >= '0' && c <= '9')
  763. codepoint -= '0';
  764. else if (c >= 'A' && c <= 'F')
  765. codepoint -= 'A' - 10;
  766. else if (c >= 'a' && c <= 'f')
  767. codepoint -= 'a' - 10;
  768. else {
  769. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
  770. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
  771. }
  772. is.Take();
  773. }
  774. return codepoint;
  775. }
  776. template <typename CharType>
  777. class StackStream {
  778. public:
  779. typedef CharType Ch;
  780. StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
  781. RAPIDJSON_FORCEINLINE void Put(Ch c) {
  782. *stack_.template Push<Ch>() = c;
  783. ++length_;
  784. }
  785. RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
  786. length_ += count;
  787. return stack_.template Push<Ch>(count);
  788. }
  789. size_t Length() const { return length_; }
  790. Ch* Pop() {
  791. return stack_.template Pop<Ch>(length_);
  792. }
  793. private:
  794. StackStream(const StackStream&);
  795. StackStream& operator=(const StackStream&);
  796. internal::Stack<StackAllocator>& stack_;
  797. SizeType length_;
  798. };
  799. // Parse string and generate String event. Different code paths for kParseInsituFlag.
  800. template<unsigned parseFlags, typename InputStream, typename Handler>
  801. void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
  802. internal::StreamLocalCopy<InputStream> copy(is);
  803. InputStream& s(copy.s);
  804. RAPIDJSON_ASSERT(s.Peek() == '\"');
  805. s.Take(); // Skip '\"'
  806. bool success = false;
  807. if (parseFlags & kParseInsituFlag) {
  808. typename InputStream::Ch *head = s.PutBegin();
  809. ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
  810. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  811. size_t length = s.PutEnd(head) - 1;
  812. RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
  813. const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
  814. success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
  815. }
  816. else {
  817. StackStream<typename TargetEncoding::Ch> stackStream(stack_);
  818. ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
  819. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  820. SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
  821. const typename TargetEncoding::Ch* const str = stackStream.Pop();
  822. success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
  823. }
  824. if (RAPIDJSON_UNLIKELY(!success))
  825. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
  826. }
  827. // Parse string to an output is
  828. // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
  829. template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
  830. RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
  831. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  832. #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  833. static const char escape[256] = {
  834. Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
  835. Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
  836. 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
  837. 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  838. Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
  839. };
  840. #undef Z16
  841. //!@endcond
  842. for (;;) {
  843. // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
  844. if (!(parseFlags & kParseValidateEncodingFlag))
  845. ScanCopyUnescapedString(is, os);
  846. Ch c = is.Peek();
  847. if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
  848. size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset
  849. is.Take();
  850. Ch e = is.Peek();
  851. if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
  852. is.Take();
  853. os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
  854. }
  855. else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
  856. is.Take();
  857. os.Put('\'');
  858. }
  859. else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
  860. is.Take();
  861. unsigned codepoint = ParseHex4(is, escapeOffset);
  862. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  863. if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
  864. // high surrogate, check if followed by valid low surrogate
  865. if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
  866. // Handle UTF-16 surrogate pair
  867. if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
  868. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
  869. unsigned codepoint2 = ParseHex4(is, escapeOffset);
  870. RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
  871. if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
  872. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
  873. codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
  874. }
  875. // single low surrogate
  876. else
  877. {
  878. RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
  879. }
  880. }
  881. TEncoding::Encode(os, codepoint);
  882. }
  883. else
  884. RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
  885. }
  886. else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
  887. is.Take();
  888. os.Put('\0'); // null-terminate the string
  889. return;
  890. }
  891. else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
  892. if (c == '\0')
  893. RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
  894. else
  895. RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
  896. }
  897. else {
  898. size_t offset = is.Tell();
  899. if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
  900. !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
  901. !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
  902. RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
  903. }
  904. }
  905. }
  906. template<typename InputStream, typename OutputStream>
  907. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
  908. // Do nothing for generic version
  909. }
  910. #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
  911. // StringStream -> StackStream<char>
  912. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
  913. const char* p = is.src_;
  914. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  915. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  916. while (p != nextAligned)
  917. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  918. is.src_ = p;
  919. return;
  920. }
  921. else
  922. os.Put(*p++);
  923. // The rest of string using SIMD
  924. static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
  925. static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
  926. static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
  927. const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  928. const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  929. const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  930. for (;; p += 16) {
  931. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  932. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  933. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  934. const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
  935. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  936. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  937. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  938. SizeType length;
  939. #ifdef _MSC_VER // Find the index of first escaped
  940. unsigned long offset;
  941. _BitScanForward(&offset, r);
  942. length = offset;
  943. #else
  944. length = static_cast<SizeType>(__builtin_ffs(r) - 1);
  945. #endif
  946. if (length != 0) {
  947. char* q = reinterpret_cast<char*>(os.Push(length));
  948. for (size_t i = 0; i < length; i++)
  949. q[i] = p[i];
  950. p += length;
  951. }
  952. break;
  953. }
  954. _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
  955. }
  956. is.src_ = p;
  957. }
  958. // InsituStringStream -> InsituStringStream
  959. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
  960. RAPIDJSON_ASSERT(&is == &os);
  961. (void)os;
  962. if (is.src_ == is.dst_) {
  963. SkipUnescapedString(is);
  964. return;
  965. }
  966. char* p = is.src_;
  967. char *q = is.dst_;
  968. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  969. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  970. while (p != nextAligned)
  971. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  972. is.src_ = p;
  973. is.dst_ = q;
  974. return;
  975. }
  976. else
  977. *q++ = *p++;
  978. // The rest of string using SIMD
  979. static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
  980. static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
  981. static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
  982. const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  983. const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  984. const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  985. for (;; p += 16, q += 16) {
  986. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  987. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  988. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  989. const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
  990. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  991. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  992. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  993. size_t length;
  994. #ifdef _MSC_VER // Find the index of first escaped
  995. unsigned long offset;
  996. _BitScanForward(&offset, r);
  997. length = offset;
  998. #else
  999. length = static_cast<size_t>(__builtin_ffs(r) - 1);
  1000. #endif
  1001. for (const char* pend = p + length; p != pend; )
  1002. *q++ = *p++;
  1003. break;
  1004. }
  1005. _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
  1006. }
  1007. is.src_ = p;
  1008. is.dst_ = q;
  1009. }
  1010. // When read/write pointers are the same for insitu stream, just skip unescaped characters
  1011. static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
  1012. RAPIDJSON_ASSERT(is.src_ == is.dst_);
  1013. char* p = is.src_;
  1014. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  1015. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1016. for (; p != nextAligned; p++)
  1017. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1018. is.src_ = is.dst_ = p;
  1019. return;
  1020. }
  1021. // The rest of string using SIMD
  1022. static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
  1023. static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
  1024. static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
  1025. const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
  1026. const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
  1027. const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
  1028. for (;; p += 16) {
  1029. const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
  1030. const __m128i t1 = _mm_cmpeq_epi8(s, dq);
  1031. const __m128i t2 = _mm_cmpeq_epi8(s, bs);
  1032. const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
  1033. const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
  1034. unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
  1035. if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
  1036. size_t length;
  1037. #ifdef _MSC_VER // Find the index of first escaped
  1038. unsigned long offset;
  1039. _BitScanForward(&offset, r);
  1040. length = offset;
  1041. #else
  1042. length = static_cast<size_t>(__builtin_ffs(r) - 1);
  1043. #endif
  1044. p += length;
  1045. break;
  1046. }
  1047. }
  1048. is.src_ = is.dst_ = p;
  1049. }
  1050. #elif defined(RAPIDJSON_NEON)
  1051. // StringStream -> StackStream<char>
  1052. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
  1053. const char* p = is.src_;
  1054. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  1055. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1056. while (p != nextAligned)
  1057. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1058. is.src_ = p;
  1059. return;
  1060. }
  1061. else
  1062. os.Put(*p++);
  1063. // The rest of string using SIMD
  1064. const uint8x16_t s0 = vmovq_n_u8('"');
  1065. const uint8x16_t s1 = vmovq_n_u8('\\');
  1066. const uint8x16_t s2 = vmovq_n_u8('\b');
  1067. const uint8x16_t s3 = vmovq_n_u8(32);
  1068. for (;; p += 16) {
  1069. const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
  1070. uint8x16_t x = vceqq_u8(s, s0);
  1071. x = vorrq_u8(x, vceqq_u8(s, s1));
  1072. x = vorrq_u8(x, vceqq_u8(s, s2));
  1073. x = vorrq_u8(x, vcltq_u8(s, s3));
  1074. x = vrev64q_u8(x); // Rev in 64
  1075. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  1076. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  1077. SizeType length = 0;
  1078. bool escaped = false;
  1079. if (low == 0) {
  1080. if (high != 0) {
  1081. uint32_t lz = internal::clzll(high);
  1082. length = 8 + (lz >> 3);
  1083. escaped = true;
  1084. }
  1085. } else {
  1086. uint32_t lz = internal::clzll(low);
  1087. length = lz >> 3;
  1088. escaped = true;
  1089. }
  1090. if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
  1091. if (length != 0) {
  1092. char* q = reinterpret_cast<char*>(os.Push(length));
  1093. for (size_t i = 0; i < length; i++)
  1094. q[i] = p[i];
  1095. p += length;
  1096. }
  1097. break;
  1098. }
  1099. vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
  1100. }
  1101. is.src_ = p;
  1102. }
  1103. // InsituStringStream -> InsituStringStream
  1104. static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
  1105. RAPIDJSON_ASSERT(&is == &os);
  1106. (void)os;
  1107. if (is.src_ == is.dst_) {
  1108. SkipUnescapedString(is);
  1109. return;
  1110. }
  1111. char* p = is.src_;
  1112. char *q = is.dst_;
  1113. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  1114. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1115. while (p != nextAligned)
  1116. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1117. is.src_ = p;
  1118. is.dst_ = q;
  1119. return;
  1120. }
  1121. else
  1122. *q++ = *p++;
  1123. // The rest of string using SIMD
  1124. const uint8x16_t s0 = vmovq_n_u8('"');
  1125. const uint8x16_t s1 = vmovq_n_u8('\\');
  1126. const uint8x16_t s2 = vmovq_n_u8('\b');
  1127. const uint8x16_t s3 = vmovq_n_u8(32);
  1128. for (;; p += 16, q += 16) {
  1129. const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
  1130. uint8x16_t x = vceqq_u8(s, s0);
  1131. x = vorrq_u8(x, vceqq_u8(s, s1));
  1132. x = vorrq_u8(x, vceqq_u8(s, s2));
  1133. x = vorrq_u8(x, vcltq_u8(s, s3));
  1134. x = vrev64q_u8(x); // Rev in 64
  1135. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  1136. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  1137. SizeType length = 0;
  1138. bool escaped = false;
  1139. if (low == 0) {
  1140. if (high != 0) {
  1141. uint32_t lz = internal::clzll(high);
  1142. length = 8 + (lz >> 3);
  1143. escaped = true;
  1144. }
  1145. } else {
  1146. uint32_t lz = internal::clzll(low);
  1147. length = lz >> 3;
  1148. escaped = true;
  1149. }
  1150. if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
  1151. for (const char* pend = p + length; p != pend; ) {
  1152. *q++ = *p++;
  1153. }
  1154. break;
  1155. }
  1156. vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
  1157. }
  1158. is.src_ = p;
  1159. is.dst_ = q;
  1160. }
  1161. // When read/write pointers are the same for insitu stream, just skip unescaped characters
  1162. static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
  1163. RAPIDJSON_ASSERT(is.src_ == is.dst_);
  1164. char* p = is.src_;
  1165. // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
  1166. const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
  1167. for (; p != nextAligned; p++)
  1168. if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
  1169. is.src_ = is.dst_ = p;
  1170. return;
  1171. }
  1172. // The rest of string using SIMD
  1173. const uint8x16_t s0 = vmovq_n_u8('"');
  1174. const uint8x16_t s1 = vmovq_n_u8('\\');
  1175. const uint8x16_t s2 = vmovq_n_u8('\b');
  1176. const uint8x16_t s3 = vmovq_n_u8(32);
  1177. for (;; p += 16) {
  1178. const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
  1179. uint8x16_t x = vceqq_u8(s, s0);
  1180. x = vorrq_u8(x, vceqq_u8(s, s1));
  1181. x = vorrq_u8(x, vceqq_u8(s, s2));
  1182. x = vorrq_u8(x, vcltq_u8(s, s3));
  1183. x = vrev64q_u8(x); // Rev in 64
  1184. uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
  1185. uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
  1186. if (low == 0) {
  1187. if (high != 0) {
  1188. uint32_t lz = internal::clzll(high);
  1189. p += 8 + (lz >> 3);
  1190. break;
  1191. }
  1192. } else {
  1193. uint32_t lz = internal::clzll(low);
  1194. p += lz >> 3;
  1195. break;
  1196. }
  1197. }
  1198. is.src_ = is.dst_ = p;
  1199. }
  1200. #endif // RAPIDJSON_NEON
  1201. template<typename InputStream, typename StackCharacter, bool backup, bool pushOnTake>
  1202. class NumberStream;
  1203. template<typename InputStream, typename StackCharacter>
  1204. class NumberStream<InputStream, StackCharacter, false, false> {
  1205. public:
  1206. typedef typename InputStream::Ch Ch;
  1207. NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
  1208. RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
  1209. RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
  1210. RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
  1211. RAPIDJSON_FORCEINLINE void Push(char) {}
  1212. size_t Tell() { return is.Tell(); }
  1213. size_t Length() { return 0; }
  1214. const StackCharacter* Pop() { return 0; }
  1215. protected:
  1216. NumberStream& operator=(const NumberStream&);
  1217. InputStream& is;
  1218. };
  1219. template<typename InputStream, typename StackCharacter>
  1220. class NumberStream<InputStream, StackCharacter, true, false> : public NumberStream<InputStream, StackCharacter, false, false> {
  1221. typedef NumberStream<InputStream, StackCharacter, false, false> Base;
  1222. public:
  1223. NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s), stackStream(reader.stack_) {}
  1224. RAPIDJSON_FORCEINLINE Ch TakePush() {
  1225. stackStream.Put(static_cast<StackCharacter>(Base::is.Peek()));
  1226. return Base::is.Take();
  1227. }
  1228. RAPIDJSON_FORCEINLINE void Push(StackCharacter c) {
  1229. stackStream.Put(c);
  1230. }
  1231. size_t Length() { return stackStream.Length(); }
  1232. const StackCharacter* Pop() {
  1233. stackStream.Put('\0');
  1234. return stackStream.Pop();
  1235. }
  1236. private:
  1237. StackStream<StackCharacter> stackStream;
  1238. };
  1239. template<typename InputStream, typename StackCharacter>
  1240. class NumberStream<InputStream, StackCharacter, true, true> : public NumberStream<InputStream, StackCharacter, true, false> {
  1241. typedef NumberStream<InputStream, StackCharacter, true, false> Base;
  1242. public:
  1243. NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s) {}
  1244. RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
  1245. };
  1246. template<unsigned parseFlags, typename InputStream, typename Handler>
  1247. void ParseNumber(InputStream& is, Handler& handler) {
  1248. typedef typename internal::SelectIf<internal::BoolType<(parseFlags & kParseNumbersAsStringsFlag) != 0>, typename TargetEncoding::Ch, char>::Type NumberCharacter;
  1249. internal::StreamLocalCopy<InputStream> copy(is);
  1250. NumberStream<InputStream, NumberCharacter,
  1251. ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
  1252. ((parseFlags & kParseInsituFlag) == 0) :
  1253. ((parseFlags & kParseFullPrecisionFlag) != 0),
  1254. (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
  1255. (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
  1256. size_t startOffset = s.Tell();
  1257. double d = 0.0;
  1258. bool useNanOrInf = false;
  1259. // Parse minus
  1260. bool minus = Consume(s, '-');
  1261. // Parse int: zero / ( digit1-9 *DIGIT )
  1262. unsigned i = 0;
  1263. uint64_t i64 = 0;
  1264. bool use64bit = false;
  1265. int significandDigit = 0;
  1266. if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
  1267. i = 0;
  1268. s.TakePush();
  1269. }
  1270. else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
  1271. i = static_cast<unsigned>(s.TakePush() - '0');
  1272. if (minus)
  1273. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1274. if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
  1275. if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
  1276. i64 = i;
  1277. use64bit = true;
  1278. break;
  1279. }
  1280. }
  1281. i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1282. significandDigit++;
  1283. }
  1284. else
  1285. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1286. if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
  1287. if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
  1288. i64 = i;
  1289. use64bit = true;
  1290. break;
  1291. }
  1292. }
  1293. i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1294. significandDigit++;
  1295. }
  1296. }
  1297. // Parse NaN or Infinity here
  1298. else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
  1299. if (Consume(s, 'N')) {
  1300. if (Consume(s, 'a') && Consume(s, 'N')) {
  1301. d = std::numeric_limits<double>::quiet_NaN();
  1302. useNanOrInf = true;
  1303. }
  1304. }
  1305. else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
  1306. if (Consume(s, 'n') && Consume(s, 'f')) {
  1307. d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
  1308. useNanOrInf = true;
  1309. if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
  1310. && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
  1311. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1312. }
  1313. }
  1314. }
  1315. if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
  1316. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1317. }
  1318. }
  1319. else
  1320. RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
  1321. // Parse 64bit int
  1322. bool useDouble = false;
  1323. if (use64bit) {
  1324. if (minus)
  1325. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1326. if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
  1327. if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
  1328. d = static_cast<double>(i64);
  1329. useDouble = true;
  1330. break;
  1331. }
  1332. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1333. significandDigit++;
  1334. }
  1335. else
  1336. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1337. if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
  1338. if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
  1339. d = static_cast<double>(i64);
  1340. useDouble = true;
  1341. break;
  1342. }
  1343. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1344. significandDigit++;
  1345. }
  1346. }
  1347. // Force double for big integer
  1348. if (useDouble) {
  1349. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1350. d = d * 10 + (s.TakePush() - '0');
  1351. }
  1352. }
  1353. // Parse frac = decimal-point 1*DIGIT
  1354. int expFrac = 0;
  1355. size_t decimalPosition;
  1356. if (!useNanOrInf && Consume(s, '.')) {
  1357. decimalPosition = s.Length();
  1358. if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
  1359. RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
  1360. if (!useDouble) {
  1361. #if RAPIDJSON_64BIT
  1362. // Use i64 to store significand in 64-bit architecture
  1363. if (!use64bit)
  1364. i64 = i;
  1365. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1366. if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
  1367. break;
  1368. else {
  1369. i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
  1370. --expFrac;
  1371. if (i64 != 0)
  1372. significandDigit++;
  1373. }
  1374. }
  1375. d = static_cast<double>(i64);
  1376. #else
  1377. // Use double to store significand in 32-bit architecture
  1378. d = static_cast<double>(use64bit ? i64 : i);
  1379. #endif
  1380. useDouble = true;
  1381. }
  1382. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1383. if (significandDigit < 17) {
  1384. d = d * 10.0 + (s.TakePush() - '0');
  1385. --expFrac;
  1386. if (RAPIDJSON_LIKELY(d > 0.0))
  1387. significandDigit++;
  1388. }
  1389. else
  1390. s.TakePush();
  1391. }
  1392. }
  1393. else
  1394. decimalPosition = s.Length(); // decimal position at the end of integer.
  1395. // Parse exp = e [ minus / plus ] 1*DIGIT
  1396. int exp = 0;
  1397. if (!useNanOrInf && (Consume(s, 'e') || Consume(s, 'E'))) {
  1398. if (!useDouble) {
  1399. d = static_cast<double>(use64bit ? i64 : i);
  1400. useDouble = true;
  1401. }
  1402. bool expMinus = false;
  1403. if (Consume(s, '+'))
  1404. ;
  1405. else if (Consume(s, '-'))
  1406. expMinus = true;
  1407. if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1408. exp = static_cast<int>(s.Take() - '0');
  1409. if (expMinus) {
  1410. // (exp + expFrac) must not underflow int => we're detecting when -exp gets
  1411. // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
  1412. // underflow territory):
  1413. //
  1414. // -(exp * 10 + 9) + expFrac >= INT_MIN
  1415. // <=> exp <= (expFrac - INT_MIN - 9) / 10
  1416. RAPIDJSON_ASSERT(expFrac <= 0);
  1417. int maxExp = (expFrac + 2147483639) / 10;
  1418. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1419. exp = exp * 10 + static_cast<int>(s.Take() - '0');
  1420. if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
  1421. while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
  1422. s.Take();
  1423. }
  1424. }
  1425. }
  1426. else { // positive exp
  1427. int maxExp = 308 - expFrac;
  1428. while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
  1429. exp = exp * 10 + static_cast<int>(s.Take() - '0');
  1430. if (RAPIDJSON_UNLIKELY(exp > maxExp))
  1431. RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
  1432. }
  1433. }
  1434. }
  1435. else
  1436. RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
  1437. if (expMinus)
  1438. exp = -exp;
  1439. }
  1440. // Finish parsing, call event according to the type of number.
  1441. bool cont = true;
  1442. if (parseFlags & kParseNumbersAsStringsFlag) {
  1443. if (parseFlags & kParseInsituFlag) {
  1444. s.Pop(); // Pop stack no matter if it will be used or not.
  1445. typename InputStream::Ch* head = is.PutBegin();
  1446. const size_t length = s.Tell() - startOffset;
  1447. RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
  1448. // unable to insert the \0 character here, it will erase the comma after this number
  1449. const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
  1450. cont = handler.RawNumber(str, SizeType(length), false);
  1451. }
  1452. else {
  1453. SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
  1454. GenericStringStream<UTF8<NumberCharacter> > srcStream(s.Pop());
  1455. StackStream<typename TargetEncoding::Ch> dstStream(stack_);
  1456. while (numCharsToCopy--) {
  1457. Transcoder<UTF8<typename TargetEncoding::Ch>, TargetEncoding>::Transcode(srcStream, dstStream);
  1458. }
  1459. dstStream.Put('\0');
  1460. const typename TargetEncoding::Ch* str = dstStream.Pop();
  1461. const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
  1462. cont = handler.RawNumber(str, SizeType(length), true);
  1463. }
  1464. }
  1465. else {
  1466. size_t length = s.Length();
  1467. const NumberCharacter* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
  1468. if (useDouble) {
  1469. int p = exp + expFrac;
  1470. if (parseFlags & kParseFullPrecisionFlag)
  1471. d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
  1472. else
  1473. d = internal::StrtodNormalPrecision(d, p);
  1474. // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
  1475. if (d > (std::numeric_limits<double>::max)()) {
  1476. // Overflow
  1477. // TODO: internal::StrtodX should report overflow (or underflow)
  1478. RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
  1479. }
  1480. cont = handler.Double(minus ? -d : d);
  1481. }
  1482. else if (useNanOrInf) {
  1483. cont = handler.Double(d);
  1484. }
  1485. else {
  1486. if (use64bit) {
  1487. if (minus)
  1488. cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
  1489. else
  1490. cont = handler.Uint64(i64);
  1491. }
  1492. else {
  1493. if (minus)
  1494. cont = handler.Int(static_cast<int32_t>(~i + 1));
  1495. else
  1496. cont = handler.Uint(i);
  1497. }
  1498. }
  1499. }
  1500. if (RAPIDJSON_UNLIKELY(!cont))
  1501. RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
  1502. }
  1503. // Parse any JSON value
  1504. template<unsigned parseFlags, typename InputStream, typename Handler>
  1505. void ParseValue(InputStream& is, Handler& handler) {
  1506. switch (is.Peek()) {
  1507. case 'n': ParseNull <parseFlags>(is, handler); break;
  1508. case 't': ParseTrue <parseFlags>(is, handler); break;
  1509. case 'f': ParseFalse <parseFlags>(is, handler); break;
  1510. case '"': ParseString<parseFlags>(is, handler); break;
  1511. case '{': ParseObject<parseFlags>(is, handler); break;
  1512. case '[': ParseArray <parseFlags>(is, handler); break;
  1513. default :
  1514. ParseNumber<parseFlags>(is, handler);
  1515. break;
  1516. }
  1517. }
  1518. // Iterative Parsing
  1519. // States
  1520. enum IterativeParsingState {
  1521. IterativeParsingFinishState = 0, // sink states at top
  1522. IterativeParsingErrorState, // sink states at top
  1523. IterativeParsingStartState,
  1524. // Object states
  1525. IterativeParsingObjectInitialState,
  1526. IterativeParsingMemberKeyState,
  1527. IterativeParsingMemberValueState,
  1528. IterativeParsingObjectFinishState,
  1529. // Array states
  1530. IterativeParsingArrayInitialState,
  1531. IterativeParsingElementState,
  1532. IterativeParsingArrayFinishState,
  1533. // Single value state
  1534. IterativeParsingValueState,
  1535. // Delimiter states (at bottom)
  1536. IterativeParsingElementDelimiterState,
  1537. IterativeParsingMemberDelimiterState,
  1538. IterativeParsingKeyValueDelimiterState,
  1539. cIterativeParsingStateCount
  1540. };
  1541. // Tokens
  1542. enum Token {
  1543. LeftBracketToken = 0,
  1544. RightBracketToken,
  1545. LeftCurlyBracketToken,
  1546. RightCurlyBracketToken,
  1547. CommaToken,
  1548. ColonToken,
  1549. StringToken,
  1550. FalseToken,
  1551. TrueToken,
  1552. NullToken,
  1553. NumberToken,
  1554. kTokenCount
  1555. };
  1556. RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
  1557. //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
  1558. #define N NumberToken
  1559. #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
  1560. // Maps from ASCII to Token
  1561. static const unsigned char tokenMap[256] = {
  1562. N16, // 00~0F
  1563. N16, // 10~1F
  1564. N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
  1565. N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
  1566. N16, // 40~4F
  1567. N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
  1568. N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
  1569. N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
  1570. N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
  1571. };
  1572. #undef N
  1573. #undef N16
  1574. //!@endcond
  1575. if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
  1576. return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
  1577. else
  1578. return NumberToken;
  1579. }
  1580. RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
  1581. // current state x one lookahead token -> new state
  1582. static const char G[cIterativeParsingStateCount][kTokenCount] = {
  1583. // Finish(sink state)
  1584. {
  1585. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1586. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1587. IterativeParsingErrorState
  1588. },
  1589. // Error(sink state)
  1590. {
  1591. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1592. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1593. IterativeParsingErrorState
  1594. },
  1595. // Start
  1596. {
  1597. IterativeParsingArrayInitialState, // Left bracket
  1598. IterativeParsingErrorState, // Right bracket
  1599. IterativeParsingObjectInitialState, // Left curly bracket
  1600. IterativeParsingErrorState, // Right curly bracket
  1601. IterativeParsingErrorState, // Comma
  1602. IterativeParsingErrorState, // Colon
  1603. IterativeParsingValueState, // String
  1604. IterativeParsingValueState, // False
  1605. IterativeParsingValueState, // True
  1606. IterativeParsingValueState, // Null
  1607. IterativeParsingValueState // Number
  1608. },
  1609. // ObjectInitial
  1610. {
  1611. IterativeParsingErrorState, // Left bracket
  1612. IterativeParsingErrorState, // Right bracket
  1613. IterativeParsingErrorState, // Left curly bracket
  1614. IterativeParsingObjectFinishState, // Right curly bracket
  1615. IterativeParsingErrorState, // Comma
  1616. IterativeParsingErrorState, // Colon
  1617. IterativeParsingMemberKeyState, // String
  1618. IterativeParsingErrorState, // False
  1619. IterativeParsingErrorState, // True
  1620. IterativeParsingErrorState, // Null
  1621. IterativeParsingErrorState // Number
  1622. },
  1623. // MemberKey
  1624. {
  1625. IterativeParsingErrorState, // Left bracket
  1626. IterativeParsingErrorState, // Right bracket
  1627. IterativeParsingErrorState, // Left curly bracket
  1628. IterativeParsingErrorState, // Right curly bracket
  1629. IterativeParsingErrorState, // Comma
  1630. IterativeParsingKeyValueDelimiterState, // Colon
  1631. IterativeParsingErrorState, // String
  1632. IterativeParsingErrorState, // False
  1633. IterativeParsingErrorState, // True
  1634. IterativeParsingErrorState, // Null
  1635. IterativeParsingErrorState // Number
  1636. },
  1637. // MemberValue
  1638. {
  1639. IterativeParsingErrorState, // Left bracket
  1640. IterativeParsingErrorState, // Right bracket
  1641. IterativeParsingErrorState, // Left curly bracket
  1642. IterativeParsingObjectFinishState, // Right curly bracket
  1643. IterativeParsingMemberDelimiterState, // Comma
  1644. IterativeParsingErrorState, // Colon
  1645. IterativeParsingErrorState, // String
  1646. IterativeParsingErrorState, // False
  1647. IterativeParsingErrorState, // True
  1648. IterativeParsingErrorState, // Null
  1649. IterativeParsingErrorState // Number
  1650. },
  1651. // ObjectFinish(sink state)
  1652. {
  1653. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1654. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1655. IterativeParsingErrorState
  1656. },
  1657. // ArrayInitial
  1658. {
  1659. IterativeParsingArrayInitialState, // Left bracket(push Element state)
  1660. IterativeParsingArrayFinishState, // Right bracket
  1661. IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
  1662. IterativeParsingErrorState, // Right curly bracket
  1663. IterativeParsingErrorState, // Comma
  1664. IterativeParsingErrorState, // Colon
  1665. IterativeParsingElementState, // String
  1666. IterativeParsingElementState, // False
  1667. IterativeParsingElementState, // True
  1668. IterativeParsingElementState, // Null
  1669. IterativeParsingElementState // Number
  1670. },
  1671. // Element
  1672. {
  1673. IterativeParsingErrorState, // Left bracket
  1674. IterativeParsingArrayFinishState, // Right bracket
  1675. IterativeParsingErrorState, // Left curly bracket
  1676. IterativeParsingErrorState, // Right curly bracket
  1677. IterativeParsingElementDelimiterState, // Comma
  1678. IterativeParsingErrorState, // Colon
  1679. IterativeParsingErrorState, // String
  1680. IterativeParsingErrorState, // False
  1681. IterativeParsingErrorState, // True
  1682. IterativeParsingErrorState, // Null
  1683. IterativeParsingErrorState // Number
  1684. },
  1685. // ArrayFinish(sink state)
  1686. {
  1687. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1688. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1689. IterativeParsingErrorState
  1690. },
  1691. // Single Value (sink state)
  1692. {
  1693. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1694. IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
  1695. IterativeParsingErrorState
  1696. },
  1697. // ElementDelimiter
  1698. {
  1699. IterativeParsingArrayInitialState, // Left bracket(push Element state)
  1700. IterativeParsingArrayFinishState, // Right bracket
  1701. IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
  1702. IterativeParsingErrorState, // Right curly bracket
  1703. IterativeParsingErrorState, // Comma
  1704. IterativeParsingErrorState, // Colon
  1705. IterativeParsingElementState, // String
  1706. IterativeParsingElementState, // False
  1707. IterativeParsingElementState, // True
  1708. IterativeParsingElementState, // Null
  1709. IterativeParsingElementState // Number
  1710. },
  1711. // MemberDelimiter
  1712. {
  1713. IterativeParsingErrorState, // Left bracket
  1714. IterativeParsingErrorState, // Right bracket
  1715. IterativeParsingErrorState, // Left curly bracket
  1716. IterativeParsingObjectFinishState, // Right curly bracket
  1717. IterativeParsingErrorState, // Comma
  1718. IterativeParsingErrorState, // Colon
  1719. IterativeParsingMemberKeyState, // String
  1720. IterativeParsingErrorState, // False
  1721. IterativeParsingErrorState, // True
  1722. IterativeParsingErrorState, // Null
  1723. IterativeParsingErrorState // Number
  1724. },
  1725. // KeyValueDelimiter
  1726. {
  1727. IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
  1728. IterativeParsingErrorState, // Right bracket
  1729. IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
  1730. IterativeParsingErrorState, // Right curly bracket
  1731. IterativeParsingErrorState, // Comma
  1732. IterativeParsingErrorState, // Colon
  1733. IterativeParsingMemberValueState, // String
  1734. IterativeParsingMemberValueState, // False
  1735. IterativeParsingMemberValueState, // True
  1736. IterativeParsingMemberValueState, // Null
  1737. IterativeParsingMemberValueState // Number
  1738. },
  1739. }; // End of G
  1740. return static_cast<IterativeParsingState>(G[state][token]);
  1741. }
  1742. // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
  1743. // May return a new state on state pop.
  1744. template <unsigned parseFlags, typename InputStream, typename Handler>
  1745. RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
  1746. (void)token;
  1747. switch (dst) {
  1748. case IterativeParsingErrorState:
  1749. return dst;
  1750. case IterativeParsingObjectInitialState:
  1751. case IterativeParsingArrayInitialState:
  1752. {
  1753. // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
  1754. // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
  1755. IterativeParsingState n = src;
  1756. if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
  1757. n = IterativeParsingElementState;
  1758. else if (src == IterativeParsingKeyValueDelimiterState)
  1759. n = IterativeParsingMemberValueState;
  1760. // Push current state.
  1761. *stack_.template Push<SizeType>(1) = n;
  1762. // Initialize and push the member/element count.
  1763. *stack_.template Push<SizeType>(1) = 0;
  1764. // Call handler
  1765. bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
  1766. // On handler short circuits the parsing.
  1767. if (!hr) {
  1768. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1769. return IterativeParsingErrorState;
  1770. }
  1771. else {
  1772. is.Take();
  1773. return dst;
  1774. }
  1775. }
  1776. case IterativeParsingMemberKeyState:
  1777. ParseString<parseFlags>(is, handler, true);
  1778. if (HasParseError())
  1779. return IterativeParsingErrorState;
  1780. else
  1781. return dst;
  1782. case IterativeParsingKeyValueDelimiterState:
  1783. RAPIDJSON_ASSERT(token == ColonToken);
  1784. is.Take();
  1785. return dst;
  1786. case IterativeParsingMemberValueState:
  1787. // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
  1788. ParseValue<parseFlags>(is, handler);
  1789. if (HasParseError()) {
  1790. return IterativeParsingErrorState;
  1791. }
  1792. return dst;
  1793. case IterativeParsingElementState:
  1794. // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
  1795. ParseValue<parseFlags>(is, handler);
  1796. if (HasParseError()) {
  1797. return IterativeParsingErrorState;
  1798. }
  1799. return dst;
  1800. case IterativeParsingMemberDelimiterState:
  1801. case IterativeParsingElementDelimiterState:
  1802. is.Take();
  1803. // Update member/element count.
  1804. *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
  1805. return dst;
  1806. case IterativeParsingObjectFinishState:
  1807. {
  1808. // Transit from delimiter is only allowed when trailing commas are enabled
  1809. if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
  1810. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
  1811. return IterativeParsingErrorState;
  1812. }
  1813. // Get member count.
  1814. SizeType c = *stack_.template Pop<SizeType>(1);
  1815. // If the object is not empty, count the last member.
  1816. if (src == IterativeParsingMemberValueState)
  1817. ++c;
  1818. // Restore the state.
  1819. IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
  1820. // Transit to Finish state if this is the topmost scope.
  1821. if (n == IterativeParsingStartState)
  1822. n = IterativeParsingFinishState;
  1823. // Call handler
  1824. bool hr = handler.EndObject(c);
  1825. // On handler short circuits the parsing.
  1826. if (!hr) {
  1827. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1828. return IterativeParsingErrorState;
  1829. }
  1830. else {
  1831. is.Take();
  1832. return n;
  1833. }
  1834. }
  1835. case IterativeParsingArrayFinishState:
  1836. {
  1837. // Transit from delimiter is only allowed when trailing commas are enabled
  1838. if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
  1839. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
  1840. return IterativeParsingErrorState;
  1841. }
  1842. // Get element count.
  1843. SizeType c = *stack_.template Pop<SizeType>(1);
  1844. // If the array is not empty, count the last element.
  1845. if (src == IterativeParsingElementState)
  1846. ++c;
  1847. // Restore the state.
  1848. IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
  1849. // Transit to Finish state if this is the topmost scope.
  1850. if (n == IterativeParsingStartState)
  1851. n = IterativeParsingFinishState;
  1852. // Call handler
  1853. bool hr = handler.EndArray(c);
  1854. // On handler short circuits the parsing.
  1855. if (!hr) {
  1856. RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
  1857. return IterativeParsingErrorState;
  1858. }
  1859. else {
  1860. is.Take();
  1861. return n;
  1862. }
  1863. }
  1864. default:
  1865. // This branch is for IterativeParsingValueState actually.
  1866. // Use `default:` rather than
  1867. // `case IterativeParsingValueState:` is for code coverage.
  1868. // The IterativeParsingStartState is not enumerated in this switch-case.
  1869. // It is impossible for that case. And it can be caught by following assertion.
  1870. // The IterativeParsingFinishState is not enumerated in this switch-case either.
  1871. // It is a "derivative" state which cannot triggered from Predict() directly.
  1872. // Therefore it cannot happen here. And it can be caught by following assertion.
  1873. RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
  1874. // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
  1875. ParseValue<parseFlags>(is, handler);
  1876. if (HasParseError()) {
  1877. return IterativeParsingErrorState;
  1878. }
  1879. return IterativeParsingFinishState;
  1880. }
  1881. }
  1882. template <typename InputStream>
  1883. void HandleError(IterativeParsingState src, InputStream& is) {
  1884. if (HasParseError()) {
  1885. // Error flag has been set.
  1886. return;
  1887. }
  1888. switch (src) {
  1889. case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
  1890. case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
  1891. case IterativeParsingObjectInitialState:
  1892. case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
  1893. case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
  1894. case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
  1895. case IterativeParsingKeyValueDelimiterState:
  1896. case IterativeParsingArrayInitialState:
  1897. case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
  1898. default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
  1899. }
  1900. }
  1901. RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
  1902. return s >= IterativeParsingElementDelimiterState;
  1903. }
  1904. RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
  1905. return s <= IterativeParsingErrorState;
  1906. }
  1907. template <unsigned parseFlags, typename InputStream, typename Handler>
  1908. ParseResult IterativeParse(InputStream& is, Handler& handler) {
  1909. parseResult_.Clear();
  1910. ClearStackOnExit scope(*this);
  1911. IterativeParsingState state = IterativeParsingStartState;
  1912. SkipWhitespaceAndComments<parseFlags>(is);
  1913. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  1914. while (is.Peek() != '\0') {
  1915. Token t = Tokenize(is.Peek());
  1916. IterativeParsingState n = Predict(state, t);
  1917. IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
  1918. if (d == IterativeParsingErrorState) {
  1919. HandleError(state, is);
  1920. break;
  1921. }
  1922. state = d;
  1923. // Do not further consume streams if a root JSON has been parsed.
  1924. if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
  1925. break;
  1926. SkipWhitespaceAndComments<parseFlags>(is);
  1927. RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
  1928. }
  1929. // Handle the end of file.
  1930. if (state != IterativeParsingFinishState)
  1931. HandleError(state, is);
  1932. return parseResult_;
  1933. }
  1934. static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string.
  1935. internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing.
  1936. ParseResult parseResult_;
  1937. IterativeParsingState state_;
  1938. }; // class GenericReader
  1939. //! Reader with UTF8 encoding and default allocator.
  1940. typedef GenericReader<UTF8<>, UTF8<> > Reader;
  1941. RAPIDJSON_NAMESPACE_END
  1942. #if defined(__clang__) || defined(_MSC_VER)
  1943. RAPIDJSON_DIAG_POP
  1944. #endif
  1945. #ifdef __GNUC__
  1946. RAPIDJSON_DIAG_POP
  1947. #endif
  1948. #endif // RAPIDJSON_READER_H_