modure.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2014 Paul Sokolovsky
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <stdio.h>
  27. #include <assert.h>
  28. #include <string.h>
  29. #include "py/runtime.h"
  30. #include "py/binary.h"
  31. #include "py/objstr.h"
  32. #include "py/stackctrl.h"
  33. #if MICROPY_PY_URE
  34. #define re1_5_stack_chk() MP_STACK_CHECK()
  35. #include "re1.5/re1.5.h"
  36. #define FLAG_DEBUG 0x1000
  37. typedef struct _mp_obj_re_t {
  38. mp_obj_base_t base;
  39. ByteProg re;
  40. } mp_obj_re_t;
  41. typedef struct _mp_obj_match_t {
  42. mp_obj_base_t base;
  43. int num_matches;
  44. mp_obj_t str;
  45. const char *caps[0];
  46. } mp_obj_match_t;
  47. STATIC void match_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
  48. (void)kind;
  49. mp_obj_match_t *self = MP_OBJ_TO_PTR(self_in);
  50. mp_printf(print, "<match num=%d>", self->num_matches);
  51. }
  52. STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
  53. mp_obj_match_t *self = MP_OBJ_TO_PTR(self_in);
  54. mp_int_t no = mp_obj_get_int(no_in);
  55. if (no < 0 || no >= self->num_matches) {
  56. nlr_raise(mp_obj_new_exception_arg1(&mp_type_IndexError, no_in));
  57. }
  58. const char *start = self->caps[no * 2];
  59. if (start == NULL) {
  60. // no match for this group
  61. return mp_const_none;
  62. }
  63. return mp_obj_new_str_of_type(mp_obj_get_type(self->str),
  64. (const byte*)start, self->caps[no * 2 + 1] - start);
  65. }
  66. MP_DEFINE_CONST_FUN_OBJ_2(match_group_obj, match_group);
  67. STATIC const mp_rom_map_elem_t match_locals_dict_table[] = {
  68. { MP_ROM_QSTR(MP_QSTR_group), MP_ROM_PTR(&match_group_obj) },
  69. };
  70. STATIC MP_DEFINE_CONST_DICT(match_locals_dict, match_locals_dict_table);
  71. STATIC const mp_obj_type_t match_type = {
  72. { &mp_type_type },
  73. .name = MP_QSTR_match,
  74. .print = match_print,
  75. .locals_dict = (void*)&match_locals_dict,
  76. };
  77. STATIC void re_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
  78. (void)kind;
  79. mp_obj_re_t *self = MP_OBJ_TO_PTR(self_in);
  80. mp_printf(print, "<re %p>", self);
  81. }
  82. STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
  83. (void)n_args;
  84. mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]);
  85. Subject subj;
  86. size_t len;
  87. subj.begin = mp_obj_str_get_data(args[1], &len);
  88. subj.end = subj.begin + len;
  89. int caps_num = (self->re.sub + 1) * 2;
  90. mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char*, caps_num);
  91. // cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
  92. memset((char*)match->caps, 0, caps_num * sizeof(char*));
  93. int res = re1_5_recursiveloopprog(&self->re, &subj, match->caps, caps_num, is_anchored);
  94. if (res == 0) {
  95. m_del_var(mp_obj_match_t, char*, caps_num, match);
  96. return mp_const_none;
  97. }
  98. match->base.type = &match_type;
  99. match->num_matches = caps_num / 2; // caps_num counts start and end pointers
  100. match->str = args[1];
  101. return MP_OBJ_FROM_PTR(match);
  102. }
  103. STATIC mp_obj_t re_match(size_t n_args, const mp_obj_t *args) {
  104. return ure_exec(true, n_args, args);
  105. }
  106. MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_match_obj, 2, 4, re_match);
  107. STATIC mp_obj_t re_search(size_t n_args, const mp_obj_t *args) {
  108. return ure_exec(false, n_args, args);
  109. }
  110. MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_search_obj, 2, 4, re_search);
  111. STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
  112. mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]);
  113. Subject subj;
  114. size_t len;
  115. const mp_obj_type_t *str_type = mp_obj_get_type(args[1]);
  116. subj.begin = mp_obj_str_get_data(args[1], &len);
  117. subj.end = subj.begin + len;
  118. int caps_num = (self->re.sub + 1) * 2;
  119. int maxsplit = 0;
  120. if (n_args > 2) {
  121. maxsplit = mp_obj_get_int(args[2]);
  122. }
  123. mp_obj_t retval = mp_obj_new_list(0, NULL);
  124. const char **caps = mp_local_alloc(caps_num * sizeof(char*));
  125. while (true) {
  126. // cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
  127. memset((char**)caps, 0, caps_num * sizeof(char*));
  128. int res = re1_5_recursiveloopprog(&self->re, &subj, caps, caps_num, false);
  129. // if we didn't have a match, or had an empty match, it's time to stop
  130. if (!res || caps[0] == caps[1]) {
  131. break;
  132. }
  133. mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, caps[0] - subj.begin);
  134. mp_obj_list_append(retval, s);
  135. if (self->re.sub > 0) {
  136. mp_raise_NotImplementedError("Splitting with sub-captures");
  137. }
  138. subj.begin = caps[1];
  139. if (maxsplit > 0 && --maxsplit == 0) {
  140. break;
  141. }
  142. }
  143. // cast is a workaround for a bug in msvc (see above)
  144. mp_local_free((char**)caps);
  145. mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, subj.end - subj.begin);
  146. mp_obj_list_append(retval, s);
  147. return retval;
  148. }
  149. MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_split_obj, 2, 3, re_split);
  150. STATIC const mp_rom_map_elem_t re_locals_dict_table[] = {
  151. { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&re_match_obj) },
  152. { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&re_search_obj) },
  153. { MP_ROM_QSTR(MP_QSTR_split), MP_ROM_PTR(&re_split_obj) },
  154. };
  155. STATIC MP_DEFINE_CONST_DICT(re_locals_dict, re_locals_dict_table);
  156. STATIC const mp_obj_type_t re_type = {
  157. { &mp_type_type },
  158. .name = MP_QSTR_ure,
  159. .print = re_print,
  160. .locals_dict = (void*)&re_locals_dict,
  161. };
  162. STATIC mp_obj_t mod_re_compile(size_t n_args, const mp_obj_t *args) {
  163. const char *re_str = mp_obj_str_get_str(args[0]);
  164. int size = re1_5_sizecode(re_str);
  165. if (size == -1) {
  166. goto error;
  167. }
  168. mp_obj_re_t *o = m_new_obj_var(mp_obj_re_t, char, size);
  169. o->base.type = &re_type;
  170. int flags = 0;
  171. if (n_args > 1) {
  172. flags = mp_obj_get_int(args[1]);
  173. }
  174. int error = re1_5_compilecode(&o->re, re_str);
  175. if (error != 0) {
  176. error:
  177. mp_raise_ValueError("Error in regex");
  178. }
  179. if (flags & FLAG_DEBUG) {
  180. re1_5_dumpcode(&o->re);
  181. }
  182. return MP_OBJ_FROM_PTR(o);
  183. }
  184. MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_compile_obj, 1, 2, mod_re_compile);
  185. STATIC mp_obj_t mod_re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
  186. (void)n_args;
  187. mp_obj_t self = mod_re_compile(1, args);
  188. const mp_obj_t args2[] = {self, args[1]};
  189. mp_obj_t match = ure_exec(is_anchored, 2, args2);
  190. return match;
  191. }
  192. STATIC mp_obj_t mod_re_match(size_t n_args, const mp_obj_t *args) {
  193. return mod_re_exec(true, n_args, args);
  194. }
  195. MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_match_obj, 2, 4, mod_re_match);
  196. STATIC mp_obj_t mod_re_search(size_t n_args, const mp_obj_t *args) {
  197. return mod_re_exec(false, n_args, args);
  198. }
  199. MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_search_obj, 2, 4, mod_re_search);
  200. STATIC const mp_rom_map_elem_t mp_module_re_globals_table[] = {
  201. { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ure) },
  202. { MP_ROM_QSTR(MP_QSTR_compile), MP_ROM_PTR(&mod_re_compile_obj) },
  203. { MP_ROM_QSTR(MP_QSTR_match), MP_ROM_PTR(&mod_re_match_obj) },
  204. { MP_ROM_QSTR(MP_QSTR_search), MP_ROM_PTR(&mod_re_search_obj) },
  205. { MP_ROM_QSTR(MP_QSTR_DEBUG), MP_ROM_INT(FLAG_DEBUG) },
  206. };
  207. STATIC MP_DEFINE_CONST_DICT(mp_module_re_globals, mp_module_re_globals_table);
  208. const mp_obj_module_t mp_module_ure = {
  209. .base = { &mp_type_module },
  210. .globals = (mp_obj_dict_t*)&mp_module_re_globals,
  211. };
  212. // Source files #include'd here to make sure they're compiled in
  213. // only if module is enabled by config setting.
  214. #define re1_5_fatal(x) assert(!x)
  215. #include "re1.5/compilecode.c"
  216. #include "re1.5/dumpcode.c"
  217. #include "re1.5/recursiveloop.c"
  218. #include "re1.5/charclass.c"
  219. #endif //MICROPY_PY_URE