regex.tcc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2013-2018 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex.tcc
  22. * This is an internal header file, included by other library headers.
  23. * Do not attempt to use it directly. @headername{regex}
  24. */
  25. namespace std _GLIBCXX_VISIBILITY(default)
  26. {
  27. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  28. namespace __detail
  29. {
  30. // Result of merging regex_match and regex_search.
  31. //
  32. // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
  33. // the other one if possible, for test purpose).
  34. //
  35. // That __match_mode is true means regex_match, else regex_search.
  36. template<typename _BiIter, typename _Alloc,
  37. typename _CharT, typename _TraitsT,
  38. _RegexExecutorPolicy __policy,
  39. bool __match_mode>
  40. bool
  41. __regex_algo_impl(_BiIter __s,
  42. _BiIter __e,
  43. match_results<_BiIter, _Alloc>& __m,
  44. const basic_regex<_CharT, _TraitsT>& __re,
  45. regex_constants::match_flag_type __flags)
  46. {
  47. if (__re._M_automaton == nullptr)
  48. return false;
  49. typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
  50. __m._M_begin = __s;
  51. __m._M_resize(__re._M_automaton->_M_sub_count());
  52. for (auto& __it : __res)
  53. __it.matched = false;
  54. bool __ret;
  55. if ((__re.flags() & regex_constants::__polynomial)
  56. || (__policy == _RegexExecutorPolicy::_S_alternate
  57. && !__re._M_automaton->_M_has_backref))
  58. {
  59. _Executor<_BiIter, _Alloc, _TraitsT, false>
  60. __executor(__s, __e, __m, __re, __flags);
  61. if (__match_mode)
  62. __ret = __executor._M_match();
  63. else
  64. __ret = __executor._M_search();
  65. }
  66. else
  67. {
  68. _Executor<_BiIter, _Alloc, _TraitsT, true>
  69. __executor(__s, __e, __m, __re, __flags);
  70. if (__match_mode)
  71. __ret = __executor._M_match();
  72. else
  73. __ret = __executor._M_search();
  74. }
  75. if (__ret)
  76. {
  77. for (auto& __it : __res)
  78. if (!__it.matched)
  79. __it.first = __it.second = __e;
  80. auto& __pre = __m._M_prefix();
  81. auto& __suf = __m._M_suffix();
  82. if (__match_mode)
  83. {
  84. __pre.matched = false;
  85. __pre.first = __s;
  86. __pre.second = __s;
  87. __suf.matched = false;
  88. __suf.first = __e;
  89. __suf.second = __e;
  90. }
  91. else
  92. {
  93. __pre.first = __s;
  94. __pre.second = __res[0].first;
  95. __pre.matched = (__pre.first != __pre.second);
  96. __suf.first = __res[0].second;
  97. __suf.second = __e;
  98. __suf.matched = (__suf.first != __suf.second);
  99. }
  100. }
  101. else
  102. {
  103. __m._M_resize(0);
  104. for (auto& __it : __res)
  105. {
  106. __it.matched = false;
  107. __it.first = __it.second = __e;
  108. }
  109. }
  110. return __ret;
  111. }
  112. }
  113. template<typename _Ch_type>
  114. template<typename _Fwd_iter>
  115. typename regex_traits<_Ch_type>::string_type
  116. regex_traits<_Ch_type>::
  117. lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
  118. {
  119. typedef std::ctype<char_type> __ctype_type;
  120. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  121. static const char* __collatenames[] =
  122. {
  123. "NUL",
  124. "SOH",
  125. "STX",
  126. "ETX",
  127. "EOT",
  128. "ENQ",
  129. "ACK",
  130. "alert",
  131. "backspace",
  132. "tab",
  133. "newline",
  134. "vertical-tab",
  135. "form-feed",
  136. "carriage-return",
  137. "SO",
  138. "SI",
  139. "DLE",
  140. "DC1",
  141. "DC2",
  142. "DC3",
  143. "DC4",
  144. "NAK",
  145. "SYN",
  146. "ETB",
  147. "CAN",
  148. "EM",
  149. "SUB",
  150. "ESC",
  151. "IS4",
  152. "IS3",
  153. "IS2",
  154. "IS1",
  155. "space",
  156. "exclamation-mark",
  157. "quotation-mark",
  158. "number-sign",
  159. "dollar-sign",
  160. "percent-sign",
  161. "ampersand",
  162. "apostrophe",
  163. "left-parenthesis",
  164. "right-parenthesis",
  165. "asterisk",
  166. "plus-sign",
  167. "comma",
  168. "hyphen",
  169. "period",
  170. "slash",
  171. "zero",
  172. "one",
  173. "two",
  174. "three",
  175. "four",
  176. "five",
  177. "six",
  178. "seven",
  179. "eight",
  180. "nine",
  181. "colon",
  182. "semicolon",
  183. "less-than-sign",
  184. "equals-sign",
  185. "greater-than-sign",
  186. "question-mark",
  187. "commercial-at",
  188. "A",
  189. "B",
  190. "C",
  191. "D",
  192. "E",
  193. "F",
  194. "G",
  195. "H",
  196. "I",
  197. "J",
  198. "K",
  199. "L",
  200. "M",
  201. "N",
  202. "O",
  203. "P",
  204. "Q",
  205. "R",
  206. "S",
  207. "T",
  208. "U",
  209. "V",
  210. "W",
  211. "X",
  212. "Y",
  213. "Z",
  214. "left-square-bracket",
  215. "backslash",
  216. "right-square-bracket",
  217. "circumflex",
  218. "underscore",
  219. "grave-accent",
  220. "a",
  221. "b",
  222. "c",
  223. "d",
  224. "e",
  225. "f",
  226. "g",
  227. "h",
  228. "i",
  229. "j",
  230. "k",
  231. "l",
  232. "m",
  233. "n",
  234. "o",
  235. "p",
  236. "q",
  237. "r",
  238. "s",
  239. "t",
  240. "u",
  241. "v",
  242. "w",
  243. "x",
  244. "y",
  245. "z",
  246. "left-curly-bracket",
  247. "vertical-line",
  248. "right-curly-bracket",
  249. "tilde",
  250. "DEL",
  251. };
  252. string __s;
  253. for (; __first != __last; ++__first)
  254. __s += __fctyp.narrow(*__first, 0);
  255. for (const auto& __it : __collatenames)
  256. if (__s == __it)
  257. return string_type(1, __fctyp.widen(
  258. static_cast<char>(&__it - __collatenames)));
  259. // TODO Add digraph support:
  260. // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
  261. return string_type();
  262. }
  263. template<typename _Ch_type>
  264. template<typename _Fwd_iter>
  265. typename regex_traits<_Ch_type>::char_class_type
  266. regex_traits<_Ch_type>::
  267. lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
  268. {
  269. typedef std::ctype<char_type> __ctype_type;
  270. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  271. // Mappings from class name to class mask.
  272. static const pair<const char*, char_class_type> __classnames[] =
  273. {
  274. {"d", ctype_base::digit},
  275. {"w", {ctype_base::alnum, _RegexMask::_S_under}},
  276. {"s", ctype_base::space},
  277. {"alnum", ctype_base::alnum},
  278. {"alpha", ctype_base::alpha},
  279. {"blank", ctype_base::blank},
  280. {"cntrl", ctype_base::cntrl},
  281. {"digit", ctype_base::digit},
  282. {"graph", ctype_base::graph},
  283. {"lower", ctype_base::lower},
  284. {"print", ctype_base::print},
  285. {"punct", ctype_base::punct},
  286. {"space", ctype_base::space},
  287. {"upper", ctype_base::upper},
  288. {"xdigit", ctype_base::xdigit},
  289. };
  290. string __s;
  291. for (; __first != __last; ++__first)
  292. __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
  293. for (const auto& __it : __classnames)
  294. if (__s == __it.first)
  295. {
  296. if (__icase
  297. && ((__it.second
  298. & (ctype_base::lower | ctype_base::upper)) != 0))
  299. return ctype_base::alpha;
  300. return __it.second;
  301. }
  302. return 0;
  303. }
  304. template<typename _Ch_type>
  305. bool
  306. regex_traits<_Ch_type>::
  307. isctype(_Ch_type __c, char_class_type __f) const
  308. {
  309. typedef std::ctype<char_type> __ctype_type;
  310. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  311. return __fctyp.is(__f._M_base, __c)
  312. // [[:w:]]
  313. || ((__f._M_extended & _RegexMask::_S_under)
  314. && __c == __fctyp.widen('_'));
  315. }
  316. template<typename _Ch_type>
  317. int
  318. regex_traits<_Ch_type>::
  319. value(_Ch_type __ch, int __radix) const
  320. {
  321. std::basic_istringstream<char_type> __is(string_type(1, __ch));
  322. long __v;
  323. if (__radix == 8)
  324. __is >> std::oct;
  325. else if (__radix == 16)
  326. __is >> std::hex;
  327. __is >> __v;
  328. return __is.fail() ? -1 : __v;
  329. }
  330. template<typename _Bi_iter, typename _Alloc>
  331. template<typename _Out_iter>
  332. _Out_iter match_results<_Bi_iter, _Alloc>::
  333. format(_Out_iter __out,
  334. const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
  335. const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
  336. match_flag_type __flags) const
  337. {
  338. __glibcxx_assert( ready() );
  339. regex_traits<char_type> __traits;
  340. typedef std::ctype<char_type> __ctype_type;
  341. const __ctype_type&
  342. __fctyp(use_facet<__ctype_type>(__traits.getloc()));
  343. auto __output = [&](size_t __idx)
  344. {
  345. auto& __sub = (*this)[__idx];
  346. if (__sub.matched)
  347. __out = std::copy(__sub.first, __sub.second, __out);
  348. };
  349. if (__flags & regex_constants::format_sed)
  350. {
  351. bool __escaping = false;
  352. for (; __fmt_first != __fmt_last; __fmt_first++)
  353. {
  354. if (__escaping)
  355. {
  356. __escaping = false;
  357. if (__fctyp.is(__ctype_type::digit, *__fmt_first))
  358. __output(__traits.value(*__fmt_first, 10));
  359. else
  360. *__out++ = *__fmt_first;
  361. continue;
  362. }
  363. if (*__fmt_first == '\\')
  364. {
  365. __escaping = true;
  366. continue;
  367. }
  368. if (*__fmt_first == '&')
  369. {
  370. __output(0);
  371. continue;
  372. }
  373. *__out++ = *__fmt_first;
  374. }
  375. if (__escaping)
  376. *__out++ = '\\';
  377. }
  378. else
  379. {
  380. while (1)
  381. {
  382. auto __next = std::find(__fmt_first, __fmt_last, '$');
  383. if (__next == __fmt_last)
  384. break;
  385. __out = std::copy(__fmt_first, __next, __out);
  386. auto __eat = [&](char __ch) -> bool
  387. {
  388. if (*__next == __ch)
  389. {
  390. ++__next;
  391. return true;
  392. }
  393. return false;
  394. };
  395. if (++__next == __fmt_last)
  396. *__out++ = '$';
  397. else if (__eat('$'))
  398. *__out++ = '$';
  399. else if (__eat('&'))
  400. __output(0);
  401. else if (__eat('`'))
  402. {
  403. auto& __sub = _M_prefix();
  404. if (__sub.matched)
  405. __out = std::copy(__sub.first, __sub.second, __out);
  406. }
  407. else if (__eat('\''))
  408. {
  409. auto& __sub = _M_suffix();
  410. if (__sub.matched)
  411. __out = std::copy(__sub.first, __sub.second, __out);
  412. }
  413. else if (__fctyp.is(__ctype_type::digit, *__next))
  414. {
  415. long __num = __traits.value(*__next, 10);
  416. if (++__next != __fmt_last
  417. && __fctyp.is(__ctype_type::digit, *__next))
  418. {
  419. __num *= 10;
  420. __num += __traits.value(*__next++, 10);
  421. }
  422. if (0 <= __num && __num < this->size())
  423. __output(__num);
  424. }
  425. else
  426. *__out++ = '$';
  427. __fmt_first = __next;
  428. }
  429. __out = std::copy(__fmt_first, __fmt_last, __out);
  430. }
  431. return __out;
  432. }
  433. template<typename _Out_iter, typename _Bi_iter,
  434. typename _Rx_traits, typename _Ch_type>
  435. _Out_iter
  436. regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
  437. const basic_regex<_Ch_type, _Rx_traits>& __e,
  438. const _Ch_type* __fmt,
  439. regex_constants::match_flag_type __flags)
  440. {
  441. typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
  442. _IterT __i(__first, __last, __e, __flags);
  443. _IterT __end;
  444. if (__i == __end)
  445. {
  446. if (!(__flags & regex_constants::format_no_copy))
  447. __out = std::copy(__first, __last, __out);
  448. }
  449. else
  450. {
  451. sub_match<_Bi_iter> __last;
  452. auto __len = char_traits<_Ch_type>::length(__fmt);
  453. for (; __i != __end; ++__i)
  454. {
  455. if (!(__flags & regex_constants::format_no_copy))
  456. __out = std::copy(__i->prefix().first, __i->prefix().second,
  457. __out);
  458. __out = __i->format(__out, __fmt, __fmt + __len, __flags);
  459. __last = __i->suffix();
  460. if (__flags & regex_constants::format_first_only)
  461. break;
  462. }
  463. if (!(__flags & regex_constants::format_no_copy))
  464. __out = std::copy(__last.first, __last.second, __out);
  465. }
  466. return __out;
  467. }
  468. template<typename _Bi_iter,
  469. typename _Ch_type,
  470. typename _Rx_traits>
  471. bool
  472. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  473. operator==(const regex_iterator& __rhs) const
  474. {
  475. if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
  476. return true;
  477. return _M_pregex == __rhs._M_pregex
  478. && _M_begin == __rhs._M_begin
  479. && _M_end == __rhs._M_end
  480. && _M_flags == __rhs._M_flags
  481. && _M_match[0] == __rhs._M_match[0];
  482. }
  483. template<typename _Bi_iter,
  484. typename _Ch_type,
  485. typename _Rx_traits>
  486. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  487. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  488. operator++()
  489. {
  490. // In all cases in which the call to regex_search returns true,
  491. // match.prefix().first shall be equal to the previous value of
  492. // match[0].second, and for each index i in the half-open range
  493. // [0, match.size()) for which match[i].matched is true,
  494. // match[i].position() shall return distance(begin, match[i].first).
  495. // [28.12.1.4.5]
  496. if (_M_match[0].matched)
  497. {
  498. auto __start = _M_match[0].second;
  499. auto __prefix_first = _M_match[0].second;
  500. if (_M_match[0].first == _M_match[0].second)
  501. {
  502. if (__start == _M_end)
  503. {
  504. _M_pregex = nullptr;
  505. return *this;
  506. }
  507. else
  508. {
  509. if (regex_search(__start, _M_end, _M_match, *_M_pregex,
  510. _M_flags
  511. | regex_constants::match_not_null
  512. | regex_constants::match_continuous))
  513. {
  514. __glibcxx_assert(_M_match[0].matched);
  515. auto& __prefix = _M_match._M_prefix();
  516. __prefix.first = __prefix_first;
  517. __prefix.matched = __prefix.first != __prefix.second;
  518. // [28.12.1.4.5]
  519. _M_match._M_begin = _M_begin;
  520. return *this;
  521. }
  522. else
  523. ++__start;
  524. }
  525. }
  526. _M_flags |= regex_constants::match_prev_avail;
  527. if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
  528. {
  529. __glibcxx_assert(_M_match[0].matched);
  530. auto& __prefix = _M_match._M_prefix();
  531. __prefix.first = __prefix_first;
  532. __prefix.matched = __prefix.first != __prefix.second;
  533. // [28.12.1.4.5]
  534. _M_match._M_begin = _M_begin;
  535. }
  536. else
  537. _M_pregex = nullptr;
  538. }
  539. return *this;
  540. }
  541. template<typename _Bi_iter,
  542. typename _Ch_type,
  543. typename _Rx_traits>
  544. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  545. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  546. operator=(const regex_token_iterator& __rhs)
  547. {
  548. _M_position = __rhs._M_position;
  549. _M_subs = __rhs._M_subs;
  550. _M_n = __rhs._M_n;
  551. _M_suffix = __rhs._M_suffix;
  552. _M_has_m1 = __rhs._M_has_m1;
  553. _M_normalize_result();
  554. return *this;
  555. }
  556. template<typename _Bi_iter,
  557. typename _Ch_type,
  558. typename _Rx_traits>
  559. bool
  560. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  561. operator==(const regex_token_iterator& __rhs) const
  562. {
  563. if (_M_end_of_seq() && __rhs._M_end_of_seq())
  564. return true;
  565. if (_M_suffix.matched && __rhs._M_suffix.matched
  566. && _M_suffix == __rhs._M_suffix)
  567. return true;
  568. if (_M_end_of_seq() || _M_suffix.matched
  569. || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
  570. return false;
  571. return _M_position == __rhs._M_position
  572. && _M_n == __rhs._M_n
  573. && _M_subs == __rhs._M_subs;
  574. }
  575. template<typename _Bi_iter,
  576. typename _Ch_type,
  577. typename _Rx_traits>
  578. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  579. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  580. operator++()
  581. {
  582. _Position __prev = _M_position;
  583. if (_M_suffix.matched)
  584. *this = regex_token_iterator();
  585. else if (_M_n + 1 < _M_subs.size())
  586. {
  587. _M_n++;
  588. _M_result = &_M_current_match();
  589. }
  590. else
  591. {
  592. _M_n = 0;
  593. ++_M_position;
  594. if (_M_position != _Position())
  595. _M_result = &_M_current_match();
  596. else if (_M_has_m1 && __prev->suffix().length() != 0)
  597. {
  598. _M_suffix.matched = true;
  599. _M_suffix.first = __prev->suffix().first;
  600. _M_suffix.second = __prev->suffix().second;
  601. _M_result = &_M_suffix;
  602. }
  603. else
  604. *this = regex_token_iterator();
  605. }
  606. return *this;
  607. }
  608. template<typename _Bi_iter,
  609. typename _Ch_type,
  610. typename _Rx_traits>
  611. void
  612. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  613. _M_init(_Bi_iter __a, _Bi_iter __b)
  614. {
  615. _M_has_m1 = false;
  616. for (auto __it : _M_subs)
  617. if (__it == -1)
  618. {
  619. _M_has_m1 = true;
  620. break;
  621. }
  622. if (_M_position != _Position())
  623. _M_result = &_M_current_match();
  624. else if (_M_has_m1)
  625. {
  626. _M_suffix.matched = true;
  627. _M_suffix.first = __a;
  628. _M_suffix.second = __b;
  629. _M_result = &_M_suffix;
  630. }
  631. else
  632. _M_result = nullptr;
  633. }
  634. _GLIBCXX_END_NAMESPACE_VERSION
  635. } // namespace