regex_executor.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2013-2018 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex_executor.h
  22. * This is an internal header file, included by other library headers.
  23. * Do not attempt to use it directly. @headername{regex}
  24. */
  25. // FIXME convert comments to doxygen format.
  26. namespace std _GLIBCXX_VISIBILITY(default)
  27. {
  28. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  29. namespace __detail
  30. {
  31. /**
  32. * @addtogroup regex-detail
  33. * @{
  34. */
  35. /**
  36. * @brief Takes a regex and an input string and does the matching.
  37. *
  38. * The %_Executor class has two modes: DFS mode and BFS mode, controlled
  39. * by the template parameter %__dfs_mode.
  40. */
  41. template<typename _BiIter, typename _Alloc, typename _TraitsT,
  42. bool __dfs_mode>
  43. class _Executor
  44. {
  45. using __search_mode = integral_constant<bool, __dfs_mode>;
  46. using __dfs = true_type;
  47. using __bfs = false_type;
  48. enum class _Match_mode : unsigned char { _Exact, _Prefix };
  49. public:
  50. typedef typename iterator_traits<_BiIter>::value_type _CharT;
  51. typedef basic_regex<_CharT, _TraitsT> _RegexT;
  52. typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
  53. typedef regex_constants::match_flag_type _FlagT;
  54. typedef typename _TraitsT::char_class_type _ClassT;
  55. typedef _NFA<_TraitsT> _NFAT;
  56. public:
  57. _Executor(_BiIter __begin,
  58. _BiIter __end,
  59. _ResultsVec& __results,
  60. const _RegexT& __re,
  61. _FlagT __flags)
  62. : _M_begin(__begin),
  63. _M_end(__end),
  64. _M_re(__re),
  65. _M_nfa(*__re._M_automaton),
  66. _M_results(__results),
  67. _M_rep_count(_M_nfa.size()),
  68. _M_states(_M_nfa._M_start(), _M_nfa.size()),
  69. _M_flags((__flags & regex_constants::match_prev_avail)
  70. ? (__flags
  71. & ~regex_constants::match_not_bol
  72. & ~regex_constants::match_not_bow)
  73. : __flags)
  74. { }
  75. // Set matched when string exactly matches the pattern.
  76. bool
  77. _M_match()
  78. {
  79. _M_current = _M_begin;
  80. return _M_main(_Match_mode::_Exact);
  81. }
  82. // Set matched when some prefix of the string matches the pattern.
  83. bool
  84. _M_search_from_first()
  85. {
  86. _M_current = _M_begin;
  87. return _M_main(_Match_mode::_Prefix);
  88. }
  89. bool
  90. _M_search();
  91. private:
  92. void
  93. _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
  94. void
  95. _M_handle_repeat(_Match_mode, _StateIdT);
  96. void
  97. _M_handle_subexpr_begin(_Match_mode, _StateIdT);
  98. void
  99. _M_handle_subexpr_end(_Match_mode, _StateIdT);
  100. void
  101. _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
  102. void
  103. _M_handle_line_end_assertion(_Match_mode, _StateIdT);
  104. void
  105. _M_handle_word_boundary(_Match_mode, _StateIdT);
  106. void
  107. _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
  108. void
  109. _M_handle_match(_Match_mode, _StateIdT);
  110. void
  111. _M_handle_backref(_Match_mode, _StateIdT);
  112. void
  113. _M_handle_accept(_Match_mode, _StateIdT);
  114. void
  115. _M_handle_alternative(_Match_mode, _StateIdT);
  116. void
  117. _M_dfs(_Match_mode __match_mode, _StateIdT __start);
  118. bool
  119. _M_main(_Match_mode __match_mode)
  120. { return _M_main_dispatch(__match_mode, __search_mode{}); }
  121. bool
  122. _M_main_dispatch(_Match_mode __match_mode, __dfs);
  123. bool
  124. _M_main_dispatch(_Match_mode __match_mode, __bfs);
  125. bool
  126. _M_is_word(_CharT __ch) const
  127. {
  128. static const _CharT __s[2] = { 'w' };
  129. return _M_re._M_automaton->_M_traits.isctype
  130. (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
  131. }
  132. bool
  133. _M_at_begin() const
  134. {
  135. return _M_current == _M_begin
  136. && !(_M_flags & (regex_constants::match_not_bol
  137. | regex_constants::match_prev_avail));
  138. }
  139. bool
  140. _M_at_end() const
  141. {
  142. return _M_current == _M_end
  143. && !(_M_flags & regex_constants::match_not_eol);
  144. }
  145. bool
  146. _M_word_boundary() const;
  147. bool
  148. _M_lookahead(_StateIdT __next);
  149. // Holds additional information used in BFS-mode.
  150. template<typename _SearchMode, typename _ResultsVec>
  151. struct _State_info;
  152. template<typename _ResultsVec>
  153. struct _State_info<__bfs, _ResultsVec>
  154. {
  155. explicit
  156. _State_info(_StateIdT __start, size_t __n)
  157. : _M_visited_states(new bool[__n]()), _M_start(__start)
  158. { }
  159. bool _M_visited(_StateIdT __i)
  160. {
  161. if (_M_visited_states[__i])
  162. return true;
  163. _M_visited_states[__i] = true;
  164. return false;
  165. }
  166. void _M_queue(_StateIdT __i, const _ResultsVec& __res)
  167. { _M_match_queue.emplace_back(__i, __res); }
  168. // Dummy implementations for BFS mode.
  169. _BiIter* _M_get_sol_pos() { return nullptr; }
  170. // Saves states that need to be considered for the next character.
  171. vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
  172. // Indicates which states are already visited.
  173. unique_ptr<bool[]> _M_visited_states;
  174. // To record current solution.
  175. _StateIdT _M_start;
  176. };
  177. template<typename _ResultsVec>
  178. struct _State_info<__dfs, _ResultsVec>
  179. {
  180. explicit
  181. _State_info(_StateIdT __start, size_t) : _M_start(__start)
  182. { }
  183. // Dummy implementations for DFS mode.
  184. bool _M_visited(_StateIdT) const { return false; }
  185. void _M_queue(_StateIdT, const _ResultsVec&) { }
  186. _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
  187. // To record current solution.
  188. _StateIdT _M_start;
  189. _BiIter _M_sol_pos;
  190. };
  191. public:
  192. _ResultsVec _M_cur_results;
  193. _BiIter _M_current;
  194. _BiIter _M_begin;
  195. const _BiIter _M_end;
  196. const _RegexT& _M_re;
  197. const _NFAT& _M_nfa;
  198. _ResultsVec& _M_results;
  199. vector<pair<_BiIter, int>> _M_rep_count;
  200. _State_info<__search_mode, _ResultsVec> _M_states;
  201. _FlagT _M_flags;
  202. // Do we have a solution so far?
  203. bool _M_has_sol;
  204. };
  205. //@} regex-detail
  206. } // namespace __detail
  207. _GLIBCXX_END_NAMESPACE_VERSION
  208. } // namespace std
  209. #include <bits/regex_executor.tcc>