uri.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. // Tencent is pleased to support the open source community by making RapidJSON available.
  2. //
  3. // (C) Copyright IBM Corporation 2021
  4. //
  5. // Licensed under the MIT License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // http://opensource.org/licenses/MIT
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #ifndef RAPIDJSON_URI_H_
  15. #define RAPIDJSON_URI_H_
  16. #include "internal/strfunc.h"
  17. #if defined(__clang__)
  18. RAPIDJSON_DIAG_PUSH
  19. RAPIDJSON_DIAG_OFF(c++98-compat)
  20. #elif defined(_MSC_VER)
  21. RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
  22. #endif
  23. RAPIDJSON_NAMESPACE_BEGIN
  24. ///////////////////////////////////////////////////////////////////////////////
  25. // GenericUri
  26. template <typename ValueType, typename Allocator=CrtAllocator>
  27. class GenericUri {
  28. public:
  29. typedef typename ValueType::Ch Ch;
  30. #if RAPIDJSON_HAS_STDSTRING
  31. typedef std::basic_string<Ch> String;
  32. #endif
  33. //! Constructors
  34. GenericUri(Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
  35. }
  36. GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
  37. Parse(uri, len);
  38. }
  39. GenericUri(const Ch* uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
  40. Parse(uri, internal::StrLen<Ch>(uri));
  41. }
  42. // Use with specializations of GenericValue
  43. template<typename T> GenericUri(const T& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
  44. const Ch* u = uri.template Get<const Ch*>(); // TypeHelper from document.h
  45. Parse(u, internal::StrLen<Ch>(u));
  46. }
  47. #if RAPIDJSON_HAS_STDSTRING
  48. GenericUri(const String& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
  49. Parse(uri.c_str(), internal::StrLen<Ch>(uri.c_str()));
  50. }
  51. #endif
  52. //! Copy constructor
  53. GenericUri(const GenericUri& rhs) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(), ownAllocator_() {
  54. *this = rhs;
  55. }
  56. //! Copy constructor
  57. GenericUri(const GenericUri& rhs, Allocator* allocator) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() {
  58. *this = rhs;
  59. }
  60. //! Destructor.
  61. ~GenericUri() {
  62. Free();
  63. RAPIDJSON_DELETE(ownAllocator_);
  64. }
  65. //! Assignment operator
  66. GenericUri& operator=(const GenericUri& rhs) {
  67. if (this != &rhs) {
  68. // Do not delete ownAllocator
  69. Free();
  70. Allocate(rhs.GetStringLength());
  71. auth_ = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength());
  72. path_ = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength());
  73. query_ = CopyPart(path_, rhs.path_, rhs.GetPathStringLength());
  74. frag_ = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength());
  75. base_ = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength());
  76. uri_ = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength());
  77. CopyPart(uri_, rhs.uri_, rhs.GetStringLength());
  78. }
  79. return *this;
  80. }
  81. //! Getters
  82. // Use with specializations of GenericValue
  83. template<typename T> void Get(T& uri, Allocator& allocator) {
  84. uri.template Set<const Ch*>(this->GetString(), allocator); // TypeHelper from document.h
  85. }
  86. const Ch* GetString() const { return uri_; }
  87. SizeType GetStringLength() const { return uri_ == 0 ? 0 : internal::StrLen<Ch>(uri_); }
  88. const Ch* GetBaseString() const { return base_; }
  89. SizeType GetBaseStringLength() const { return base_ == 0 ? 0 : internal::StrLen<Ch>(base_); }
  90. const Ch* GetSchemeString() const { return scheme_; }
  91. SizeType GetSchemeStringLength() const { return scheme_ == 0 ? 0 : internal::StrLen<Ch>(scheme_); }
  92. const Ch* GetAuthString() const { return auth_; }
  93. SizeType GetAuthStringLength() const { return auth_ == 0 ? 0 : internal::StrLen<Ch>(auth_); }
  94. const Ch* GetPathString() const { return path_; }
  95. SizeType GetPathStringLength() const { return path_ == 0 ? 0 : internal::StrLen<Ch>(path_); }
  96. const Ch* GetQueryString() const { return query_; }
  97. SizeType GetQueryStringLength() const { return query_ == 0 ? 0 : internal::StrLen<Ch>(query_); }
  98. const Ch* GetFragString() const { return frag_; }
  99. SizeType GetFragStringLength() const { return frag_ == 0 ? 0 : internal::StrLen<Ch>(frag_); }
  100. #if RAPIDJSON_HAS_STDSTRING
  101. static String Get(const GenericUri& uri) { return String(uri.GetString(), uri.GetStringLength()); }
  102. static String GetBase(const GenericUri& uri) { return String(uri.GetBaseString(), uri.GetBaseStringLength()); }
  103. static String GetScheme(const GenericUri& uri) { return String(uri.GetSchemeString(), uri.GetSchemeStringLength()); }
  104. static String GetAuth(const GenericUri& uri) { return String(uri.GetAuthString(), uri.GetAuthStringLength()); }
  105. static String GetPath(const GenericUri& uri) { return String(uri.GetPathString(), uri.GetPathStringLength()); }
  106. static String GetQuery(const GenericUri& uri) { return String(uri.GetQueryString(), uri.GetQueryStringLength()); }
  107. static String GetFrag(const GenericUri& uri) { return String(uri.GetFragString(), uri.GetFragStringLength()); }
  108. #endif
  109. //! Equality operators
  110. bool operator==(const GenericUri& rhs) const {
  111. return Match(rhs, true);
  112. }
  113. bool operator!=(const GenericUri& rhs) const {
  114. return !Match(rhs, true);
  115. }
  116. bool Match(const GenericUri& uri, bool full = true) const {
  117. Ch* s1;
  118. Ch* s2;
  119. if (full) {
  120. s1 = uri_;
  121. s2 = uri.uri_;
  122. } else {
  123. s1 = base_;
  124. s2 = uri.base_;
  125. }
  126. if (s1 == s2) return true;
  127. if (s1 == 0 || s2 == 0) return false;
  128. return internal::StrCmp<Ch>(s1, s2) == 0;
  129. }
  130. //! Resolve this URI against another (base) URI in accordance with URI resolution rules.
  131. // See https://tools.ietf.org/html/rfc3986
  132. // Use for resolving an id or $ref with an in-scope id.
  133. // Returns a new GenericUri for the resolved URI.
  134. GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0) {
  135. GenericUri resuri;
  136. resuri.allocator_ = allocator;
  137. // Ensure enough space for combining paths
  138. resuri.Allocate(GetStringLength() + baseuri.GetStringLength() + 1); // + 1 for joining slash
  139. if (!(GetSchemeStringLength() == 0)) {
  140. // Use all of this URI
  141. resuri.auth_ = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength());
  142. resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
  143. resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
  144. resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
  145. resuri.RemoveDotSegments();
  146. } else {
  147. // Use the base scheme
  148. resuri.auth_ = CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength());
  149. if (!(GetAuthStringLength() == 0)) {
  150. // Use this auth, path, query
  151. resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength());
  152. resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
  153. resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
  154. resuri.RemoveDotSegments();
  155. } else {
  156. // Use the base auth
  157. resuri.path_ = CopyPart(resuri.auth_, baseuri.auth_, baseuri.GetAuthStringLength());
  158. if (GetPathStringLength() == 0) {
  159. // Use the base path
  160. resuri.query_ = CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength());
  161. if (GetQueryStringLength() == 0) {
  162. // Use the base query
  163. resuri.frag_ = CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength());
  164. } else {
  165. // Use this query
  166. resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
  167. }
  168. } else {
  169. if (path_[0] == '/') {
  170. // Absolute path - use all of this path
  171. resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength());
  172. resuri.RemoveDotSegments();
  173. } else {
  174. // Relative path - append this path to base path after base path's last slash
  175. size_t pos = 0;
  176. if (!(baseuri.GetAuthStringLength() == 0) && baseuri.GetPathStringLength() == 0) {
  177. resuri.path_[pos] = '/';
  178. pos++;
  179. }
  180. size_t lastslashpos = baseuri.GetPathStringLength();
  181. while (lastslashpos > 0) {
  182. if (baseuri.path_[lastslashpos - 1] == '/') break;
  183. lastslashpos--;
  184. }
  185. std::memcpy(&resuri.path_[pos], baseuri.path_, lastslashpos * sizeof(Ch));
  186. pos += lastslashpos;
  187. resuri.query_ = CopyPart(&resuri.path_[pos], path_, GetPathStringLength());
  188. resuri.RemoveDotSegments();
  189. }
  190. // Use this query
  191. resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength());
  192. }
  193. }
  194. }
  195. // Always use this frag
  196. resuri.base_ = CopyPart(resuri.frag_, frag_, GetFragStringLength());
  197. // Re-constitute base_ and uri_
  198. resuri.SetBase();
  199. resuri.uri_ = resuri.base_ + resuri.GetBaseStringLength() + 1;
  200. resuri.SetUri();
  201. return resuri;
  202. }
  203. //! Get the allocator of this GenericUri.
  204. Allocator& GetAllocator() { return *allocator_; }
  205. private:
  206. // Allocate memory for a URI
  207. // Returns total amount allocated
  208. std::size_t Allocate(std::size_t len) {
  209. // Create own allocator if user did not supply.
  210. if (!allocator_)
  211. ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
  212. // Allocate one block containing each part of the URI (5) plus base plus full URI, all null terminated.
  213. // Order: scheme, auth, path, query, frag, base, uri
  214. // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
  215. size_t total = (3 * len + 7) * sizeof(Ch);
  216. scheme_ = static_cast<Ch*>(allocator_->Malloc(total));
  217. *scheme_ = '\0';
  218. auth_ = scheme_;
  219. auth_++;
  220. *auth_ = '\0';
  221. path_ = auth_;
  222. path_++;
  223. *path_ = '\0';
  224. query_ = path_;
  225. query_++;
  226. *query_ = '\0';
  227. frag_ = query_;
  228. frag_++;
  229. *frag_ = '\0';
  230. base_ = frag_;
  231. base_++;
  232. *base_ = '\0';
  233. uri_ = base_;
  234. uri_++;
  235. *uri_ = '\0';
  236. return total;
  237. }
  238. // Free memory for a URI
  239. void Free() {
  240. if (scheme_) {
  241. Allocator::Free(scheme_);
  242. scheme_ = 0;
  243. }
  244. }
  245. // Parse a URI into constituent scheme, authority, path, query, & fragment parts
  246. // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as per
  247. // https://tools.ietf.org/html/rfc3986
  248. void Parse(const Ch* uri, std::size_t len) {
  249. std::size_t start = 0, pos1 = 0, pos2 = 0;
  250. Allocate(len);
  251. // Look for scheme ([^:/?#]+):)?
  252. if (start < len) {
  253. while (pos1 < len) {
  254. if (uri[pos1] == ':') break;
  255. pos1++;
  256. }
  257. if (pos1 != len) {
  258. while (pos2 < len) {
  259. if (uri[pos2] == '/') break;
  260. if (uri[pos2] == '?') break;
  261. if (uri[pos2] == '#') break;
  262. pos2++;
  263. }
  264. if (pos1 < pos2) {
  265. pos1++;
  266. std::memcpy(scheme_, &uri[start], pos1 * sizeof(Ch));
  267. scheme_[pos1] = '\0';
  268. start = pos1;
  269. }
  270. }
  271. }
  272. // Look for auth (//([^/?#]*))?
  273. // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
  274. auth_ = scheme_ + GetSchemeStringLength();
  275. auth_++;
  276. *auth_ = '\0';
  277. if (start < len - 1 && uri[start] == '/' && uri[start + 1] == '/') {
  278. pos2 = start + 2;
  279. while (pos2 < len) {
  280. if (uri[pos2] == '/') break;
  281. if (uri[pos2] == '?') break;
  282. if (uri[pos2] == '#') break;
  283. pos2++;
  284. }
  285. std::memcpy(auth_, &uri[start], (pos2 - start) * sizeof(Ch));
  286. auth_[pos2 - start] = '\0';
  287. start = pos2;
  288. }
  289. // Look for path ([^?#]*)
  290. // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
  291. path_ = auth_ + GetAuthStringLength();
  292. path_++;
  293. *path_ = '\0';
  294. if (start < len) {
  295. pos2 = start;
  296. while (pos2 < len) {
  297. if (uri[pos2] == '?') break;
  298. if (uri[pos2] == '#') break;
  299. pos2++;
  300. }
  301. if (start != pos2) {
  302. std::memcpy(path_, &uri[start], (pos2 - start) * sizeof(Ch));
  303. path_[pos2 - start] = '\0';
  304. if (path_[0] == '/')
  305. RemoveDotSegments(); // absolute path - normalize
  306. start = pos2;
  307. }
  308. }
  309. // Look for query (\?([^#]*))?
  310. // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
  311. query_ = path_ + GetPathStringLength();
  312. query_++;
  313. *query_ = '\0';
  314. if (start < len && uri[start] == '?') {
  315. pos2 = start + 1;
  316. while (pos2 < len) {
  317. if (uri[pos2] == '#') break;
  318. pos2++;
  319. }
  320. if (start != pos2) {
  321. std::memcpy(query_, &uri[start], (pos2 - start) * sizeof(Ch));
  322. query_[pos2 - start] = '\0';
  323. start = pos2;
  324. }
  325. }
  326. // Look for fragment (#(.*))?
  327. // Note need to set, increment, assign in 3 stages to avoid compiler warning bug.
  328. frag_ = query_ + GetQueryStringLength();
  329. frag_++;
  330. *frag_ = '\0';
  331. if (start < len && uri[start] == '#') {
  332. std::memcpy(frag_, &uri[start], (len - start) * sizeof(Ch));
  333. frag_[len - start] = '\0';
  334. }
  335. // Re-constitute base_ and uri_
  336. base_ = frag_ + GetFragStringLength() + 1;
  337. SetBase();
  338. uri_ = base_ + GetBaseStringLength() + 1;
  339. SetUri();
  340. }
  341. // Reconstitute base
  342. void SetBase() {
  343. Ch* next = base_;
  344. std::memcpy(next, scheme_, GetSchemeStringLength() * sizeof(Ch));
  345. next+= GetSchemeStringLength();
  346. std::memcpy(next, auth_, GetAuthStringLength() * sizeof(Ch));
  347. next+= GetAuthStringLength();
  348. std::memcpy(next, path_, GetPathStringLength() * sizeof(Ch));
  349. next+= GetPathStringLength();
  350. std::memcpy(next, query_, GetQueryStringLength() * sizeof(Ch));
  351. next+= GetQueryStringLength();
  352. *next = '\0';
  353. }
  354. // Reconstitute uri
  355. void SetUri() {
  356. Ch* next = uri_;
  357. std::memcpy(next, base_, GetBaseStringLength() * sizeof(Ch));
  358. next+= GetBaseStringLength();
  359. std::memcpy(next, frag_, GetFragStringLength() * sizeof(Ch));
  360. next+= GetFragStringLength();
  361. *next = '\0';
  362. }
  363. // Copy a part from one GenericUri to another
  364. // Return the pointer to the next part to be copied to
  365. Ch* CopyPart(Ch* to, Ch* from, std::size_t len) {
  366. RAPIDJSON_ASSERT(to != 0);
  367. RAPIDJSON_ASSERT(from != 0);
  368. std::memcpy(to, from, len * sizeof(Ch));
  369. to[len] = '\0';
  370. Ch* next = to + len + 1;
  371. return next;
  372. }
  373. // Remove . and .. segments from the path_ member.
  374. // https://tools.ietf.org/html/rfc3986
  375. // This is done in place as we are only removing segments.
  376. void RemoveDotSegments() {
  377. std::size_t pathlen = GetPathStringLength();
  378. std::size_t pathpos = 0; // Position in path_
  379. std::size_t newpos = 0; // Position in new path_
  380. // Loop through each segment in original path_
  381. while (pathpos < pathlen) {
  382. // Get next segment, bounded by '/' or end
  383. size_t slashpos = 0;
  384. while ((pathpos + slashpos) < pathlen) {
  385. if (path_[pathpos + slashpos] == '/') break;
  386. slashpos++;
  387. }
  388. // Check for .. and . segments
  389. if (slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.') {
  390. // Backup a .. segment in the new path_
  391. // We expect to find a previously added slash at the end or nothing
  392. RAPIDJSON_ASSERT(newpos == 0 || path_[newpos - 1] == '/');
  393. size_t lastslashpos = newpos;
  394. // Make sure we don't go beyond the start segment
  395. if (lastslashpos > 1) {
  396. // Find the next to last slash and back up to it
  397. lastslashpos--;
  398. while (lastslashpos > 0) {
  399. if (path_[lastslashpos - 1] == '/') break;
  400. lastslashpos--;
  401. }
  402. // Set the new path_ position
  403. newpos = lastslashpos;
  404. }
  405. } else if (slashpos == 1 && path_[pathpos] == '.') {
  406. // Discard . segment, leaves new path_ unchanged
  407. } else {
  408. // Move any other kind of segment to the new path_
  409. RAPIDJSON_ASSERT(newpos <= pathpos);
  410. std::memmove(&path_[newpos], &path_[pathpos], slashpos * sizeof(Ch));
  411. newpos += slashpos;
  412. // Add slash if not at end
  413. if ((pathpos + slashpos) < pathlen) {
  414. path_[newpos] = '/';
  415. newpos++;
  416. }
  417. }
  418. // Move to next segment
  419. pathpos += slashpos + 1;
  420. }
  421. path_[newpos] = '\0';
  422. }
  423. Ch* uri_; // Everything
  424. Ch* base_; // Everything except fragment
  425. Ch* scheme_; // Includes the :
  426. Ch* auth_; // Includes the //
  427. Ch* path_; // Absolute if starts with /
  428. Ch* query_; // Includes the ?
  429. Ch* frag_; // Includes the #
  430. Allocator* allocator_; //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
  431. Allocator* ownAllocator_; //!< Allocator owned by this Uri.
  432. };
  433. //! GenericUri for Value (UTF-8, default allocator).
  434. typedef GenericUri<Value> Uri;
  435. RAPIDJSON_NAMESPACE_END
  436. #if defined(__clang__)
  437. RAPIDJSON_DIAG_POP
  438. #endif
  439. #endif // RAPIDJSON_URI_H_