re-api-adapter.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include "BaseObj.h"
  4. #include "TinyObj.h"
  5. #include "re.h"
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include "PikaStdData_List.h"
  10. #include "PikaStdData_Tuple.h"
  11. #include "cre.h"
  12. #include "re_Match.h"
  13. #include "re_Pattern.h"
  14. #if !PIKASCRIPT_VERSION_REQUIRE_MINIMUN(1, 10, 5)
  15. #error PikaScript version 1.10.5 or later is required.
  16. #endif
  17. #define raise_error \
  18. { obj_setErrorCode(self, -__LINE__); }
  19. typedef PikaObj* Any;
  20. void re_Match___init__args(PikaObj* self, char* sub, int* vec, int ven);
  21. int _get_flags(PikaTuple* val);
  22. PikaObj* __split(void* pattern__or__re,
  23. char* subject,
  24. int max_split,
  25. int flags,
  26. int mode_re);
  27. PikaObj* __findall(void* pattern__or__re,
  28. char* subject,
  29. int flags,
  30. int mode_re);
  31. PikaObj* __subn(void* pattern__or__re,
  32. char* repl,
  33. char* subjet,
  34. int count,
  35. int flags,
  36. int mode_re);
  37. void re___init__(PikaObj* self) {
  38. obj_setInt(self, "A", PCRE_ONLY_ASCII);
  39. obj_setInt(self, "I", PCRE_CASELESS);
  40. obj_setInt(self, "M", PCRE_MULTILINE);
  41. obj_setInt(self, "S", PCRE_DOTALL);
  42. obj_setInt(self, "ASCII", PCRE_ONLY_ASCII);
  43. obj_setInt(self, "IGNORECASE", PCRE_CASELESS);
  44. obj_setInt(self, "MULTILINE", PCRE_MULTILINE);
  45. obj_setInt(self, "DOTALL", PCRE_DOTALL);
  46. }
  47. PikaObj* re_findall(PikaObj* self,
  48. char* pattern,
  49. char* subject,
  50. PikaTuple* val) {
  51. int flags = 0;
  52. flags = _get_flags(val);
  53. if (flags < 0) {
  54. obj_setErrorCode(self, __LINE__);
  55. return NULL;
  56. }
  57. Any list = __findall(pattern, subject, flags, 0);
  58. if (!list)
  59. raise_error;
  60. return list;
  61. }
  62. PikaObj* re_match(PikaObj* self, char* pattern, char* subject, PikaTuple* val) {
  63. int flags = 0;
  64. flags = _get_flags(val);
  65. if (flags < 0) {
  66. obj_setErrorCode(self, __LINE__);
  67. return NULL;
  68. }
  69. int ven = -1;
  70. int* vec = pcre_match(pattern, subject, strlen(subject), &ven, flags);
  71. if (!vec) {
  72. if (ven < 0)
  73. obj_setErrorCode(self, -__LINE__);
  74. return NULL;
  75. }
  76. Any m = newNormalObj(New_re_Match);
  77. re_Match___init__args(m, subject, vec, ven);
  78. return m;
  79. }
  80. PikaObj* re_fullmatch(PikaObj* self,
  81. char* pattern,
  82. char* subject,
  83. PikaTuple* val) {
  84. int flags = 0;
  85. flags = _get_flags(val);
  86. if (flags < 0) {
  87. obj_setErrorCode(self, __LINE__);
  88. return NULL;
  89. }
  90. int ven = -1;
  91. int* vec = pcre_fullmatch(pattern, subject, strlen(subject), &ven, flags);
  92. if (!vec) {
  93. if (ven < 0)
  94. obj_setErrorCode(self, -__LINE__);
  95. return NULL;
  96. }
  97. Any m = newNormalObj(New_re_Match);
  98. re_Match___init__args(m, subject, vec, ven);
  99. return m;
  100. }
  101. PikaObj* re_search(PikaObj* self,
  102. char* pattern,
  103. char* subject,
  104. PikaTuple* val) {
  105. int flags = 0;
  106. flags = _get_flags(val);
  107. if (flags < 0) {
  108. obj_setErrorCode(self, __LINE__);
  109. return NULL;
  110. }
  111. int ven = -1;
  112. int* vec = pcre_search(pattern, subject, strlen(subject), &ven, flags);
  113. if (!vec) {
  114. if (ven < 0)
  115. obj_setErrorCode(self, -__LINE__);
  116. return NULL;
  117. }
  118. Any m = newNormalObj(New_re_Match);
  119. re_Match___init__args(m, subject, vec, ven);
  120. return m;
  121. }
  122. char* re_sub(PikaObj* self,
  123. char* pattern,
  124. char* repl,
  125. char* subjet,
  126. PikaTuple* val) {
  127. int flags = PCRE_UTF8;
  128. int count = 0;
  129. int argn = pikaTuple_getSize(val);
  130. if (argn >= 1) {
  131. Arg* arg_i = pikaTuple_getArg(val, 0);
  132. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  133. obj_setErrorCode(self, -__LINE__);
  134. return NULL;
  135. }
  136. count = arg_getInt(arg_i);
  137. }
  138. if (argn >= 2) {
  139. Arg* arg_i = pikaTuple_getArg(val, 1);
  140. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  141. obj_setErrorCode(self, -__LINE__);
  142. return NULL;
  143. }
  144. flags = arg_getInt(arg_i);
  145. if (flags | PCRE_ONLY_ASCII) {
  146. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  147. }
  148. }
  149. int length = strlen(subjet);
  150. char* s = pcre_subn(pattern, repl, subjet, length, count, flags, NULL);
  151. if (!s) {
  152. obj_setErrorCode(self, -__LINE__);
  153. return NULL;
  154. }
  155. if (s == subjet) {
  156. obj_setStr(self, "_b", subjet);
  157. return obj_getStr(self, "_b");
  158. }
  159. int len = strlen(s);
  160. char* b = (char*)malloc(len + 1);
  161. if (!b) {
  162. free(s);
  163. return NULL;
  164. }
  165. memcpy(b, s, len);
  166. b[len] = 0;
  167. obj_setStr(self, "_b", b);
  168. free(b);
  169. free(s);
  170. return obj_getStr(self, "_b");
  171. }
  172. PikaObj* re_subn(PikaObj* self,
  173. char* pattern,
  174. char* repl,
  175. char* subjet,
  176. PikaTuple* val) {
  177. int flags = PCRE_UTF8;
  178. int count = 0;
  179. int argn = pikaTuple_getSize(val);
  180. if (argn >= 1) {
  181. Arg* arg_i = pikaTuple_getArg(val, 0);
  182. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  183. obj_setErrorCode(self, -__LINE__);
  184. return NULL;
  185. }
  186. count = arg_getInt(arg_i);
  187. }
  188. if (argn >= 2) {
  189. Arg* arg_i = pikaTuple_getArg(val, 1);
  190. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  191. obj_setErrorCode(self, -__LINE__);
  192. return NULL;
  193. }
  194. flags = arg_getInt(arg_i);
  195. if (flags | PCRE_ONLY_ASCII) {
  196. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  197. }
  198. }
  199. Any res = __subn(pattern, repl, subjet, count, flags, 0);
  200. if (!res) {
  201. raise_error;
  202. }
  203. return res;
  204. }
  205. PikaObj* re_compile(PikaObj* self, char* pattern, PikaTuple* val) {
  206. const char* error;
  207. int erroffset;
  208. int flags = _get_flags(val);
  209. if (flags < 0) {
  210. raise_error;
  211. return NULL;
  212. }
  213. pcre* re = pcre_compile(pattern, flags, &error, &erroffset, NULL);
  214. if (!re) {
  215. obj_setErrorCode(self, erroffset);
  216. return NULL;
  217. }
  218. Any m = newNormalObj(New_re_Pattern);
  219. obj_setPtr(m, "_re", re);
  220. return m;
  221. }
  222. PikaObj* re_split(PikaObj* self, char* pattern, char* subject, PikaTuple* val) {
  223. int flags = PCRE_UTF8;
  224. int max_split = 0;
  225. int argn = pikaTuple_getSize(val);
  226. if (argn >= 1) {
  227. Arg* arg_i = pikaTuple_getArg(val, 0);
  228. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  229. obj_setErrorCode(self, -__LINE__);
  230. return NULL;
  231. }
  232. max_split = arg_getInt(arg_i);
  233. }
  234. if (argn >= 2) {
  235. Arg* arg_i = pikaTuple_getArg(val, 1);
  236. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  237. obj_setErrorCode(self, -__LINE__);
  238. return NULL;
  239. }
  240. flags = arg_getInt(arg_i);
  241. if (flags | PCRE_ONLY_ASCII) {
  242. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  243. }
  244. }
  245. Any list = __split(pattern, subject, max_split, flags, 0);
  246. if (!list)
  247. raise_error;
  248. return list;
  249. }
  250. char* re_escape(PikaObj* self, char* pattern) {
  251. const char* special_chars = "()[]{}?*+-|^$\\.&~# \t\n\r\v\f";
  252. const int special_chars_len = 25;
  253. if (!pattern)
  254. return NULL;
  255. int n = strlen(pattern);
  256. int after_size = n;
  257. for (int i = 0; i < n; i++) {
  258. for (int j = 0; j < special_chars_len; j++) {
  259. if (pattern[i] != special_chars[j])
  260. continue;
  261. after_size++;
  262. break;
  263. }
  264. }
  265. char* new_s = (char*)malloc(after_size + 1);
  266. if (!new_s)
  267. return NULL;
  268. int at = 0;
  269. while (*pattern) {
  270. char c = *pattern;
  271. int j = 0;
  272. for (; j < special_chars_len; j++) {
  273. if (c != special_chars[j])
  274. continue;
  275. new_s[at++] = '\\';
  276. break;
  277. }
  278. new_s[at++] = c;
  279. pattern++;
  280. }
  281. new_s[at++] = 0;
  282. obj_setStr(self, "_b", new_s);
  283. free(new_s);
  284. return obj_getStr(self, "_b");
  285. }
  286. void re_Match___del__(PikaObj* self) {
  287. void* vec = obj_getPtr(self, "_vec");
  288. if (!vec)
  289. return;
  290. free(vec);
  291. }
  292. void re_Match___init__(PikaObj* self) {
  293. if (!obj_isArgExist(self, "_vec")) {
  294. obj_setPtr(self, "_vec", NULL);
  295. obj_setStr(self, "_b", "");
  296. obj_setInt(self, "_ven", 0);
  297. obj_setStr(self, "_s", "");
  298. }
  299. }
  300. void re_Match___init__args(PikaObj* self, char* sub, int* vec, int ven) {
  301. obj_setPtr(self, "_vec", vec);
  302. obj_setStr(self, "_b", "");
  303. obj_setInt(self, "_ven", ven);
  304. obj_setStr(self, "_s", sub);
  305. }
  306. char* re_Match_group(PikaObj* self, PikaTuple* val) {
  307. int n = 0;
  308. int argn = pikaTuple_getSize(val);
  309. if (argn >= 1) {
  310. Arg* arg_i = pikaTuple_getArg(val, 0);
  311. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  312. obj_setErrorCode(self, -__LINE__);
  313. return NULL;
  314. }
  315. n = arg_getInt(arg_i);
  316. }
  317. int* vec = obj_getPtr(self, "_vec");
  318. if (!vec)
  319. return NULL;
  320. char* s = obj_getStr(self, "_s");
  321. if (!s)
  322. return NULL;
  323. int ven = obj_getInt(self, "_ven");
  324. if (n >= ven || n < 0) {
  325. obj_setErrorCode(self, -__LINE__);
  326. return NULL;
  327. }
  328. int len = vec[n * 2 + 1] - vec[n * 2];
  329. if (!len)
  330. return "";
  331. char* b = (char*)malloc(len + 1);
  332. if (!b)
  333. return NULL;
  334. memcpy(b, s + vec[n * 2], len);
  335. b[len] = 0;
  336. obj_setStr(self, "_b", b);
  337. free(b);
  338. return obj_getStr(self, "_b");
  339. }
  340. PikaObj* re_Match_groups(PikaObj* self) {
  341. int* vec = obj_getPtr(self, "_vec");
  342. if (!vec)
  343. return NULL;
  344. char* s = obj_getStr(self, "_s");
  345. if (!s)
  346. return NULL;
  347. int ven = obj_getInt(self, "_ven");
  348. if (!ven)
  349. return NULL;
  350. PikaTuple* tup_obj = New_pikaTuple();
  351. for (int i = 1; i < ven; i++) {
  352. Arg* str_arg1;
  353. int len = vec[i * 2 + 1] - vec[i * 2];
  354. if (len) {
  355. char* b = (char*)malloc(len + 1);
  356. if (!b)
  357. return NULL;
  358. memcpy(b, s + vec[i * 2], len);
  359. b[len] = 0;
  360. str_arg1 = arg_newStr(b);
  361. free(b);
  362. } else {
  363. str_arg1 = arg_newStr("");
  364. }
  365. pikaList_append(tup_obj, str_arg1);
  366. }
  367. return tup_obj;
  368. }
  369. PikaObj* re_Match_span(PikaObj* self, PikaTuple* val) {
  370. int group_n = 0;
  371. int argn = pikaTuple_getSize(val);
  372. if (argn >= 1) {
  373. Arg* arg_i = pikaTuple_getArg(val, 0);
  374. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  375. obj_setErrorCode(self, -__LINE__);
  376. return NULL;
  377. }
  378. group_n = arg_getInt(arg_i);
  379. }
  380. int* vec = obj_getPtr(self, "_vec");
  381. if (!vec) {
  382. raise_error;
  383. return NULL;
  384. }
  385. int ven = obj_getInt(self, "_ven");
  386. if (!ven || group_n >= ven) {
  387. obj_setErrorCode(self, -__LINE__);
  388. return NULL;
  389. }
  390. // tu_getNew(tu, tu_obj);
  391. PikaTuple* tuple = New_pikaTuple();
  392. pikaList_append(tuple, arg_newInt(vec[group_n * 2]));
  393. pikaList_append(tuple, arg_newInt(vec[group_n * 2 + 1]));
  394. return tuple;
  395. }
  396. void re_Pattern___del__(PikaObj* self) {
  397. void* _re = obj_getPtr(self, "_re");
  398. if (!_re)
  399. return;
  400. pcre* re = (pcre*)_re;
  401. pcre_free(re);
  402. }
  403. void re_Pattern___init__(PikaObj* self) {
  404. if (!obj_isArgExist(self, "_re")) {
  405. obj_setPtr(self, "_re", NULL);
  406. obj_setStr(self, "_b", "");
  407. obj_setInt(self, "_n", -1);
  408. }
  409. }
  410. PikaObj* re_Pattern_findall(PikaObj* self, char* subject, PikaTuple* val) {
  411. int flags = 0;
  412. flags = _get_flags(val);
  413. if (flags < 0) {
  414. obj_setErrorCode(self, __LINE__);
  415. return NULL;
  416. }
  417. if (!obj_isArgExist(self, "_re"))
  418. return NULL;
  419. pcre* re = obj_getPtr(self, "_re");
  420. Any list = __findall(re, subject, flags, 1);
  421. if (!list)
  422. raise_error;
  423. return list;
  424. }
  425. PikaObj* re_Pattern_match(PikaObj* self, char* subject, PikaTuple* val) {
  426. int flags = 0;
  427. flags = _get_flags(val);
  428. if (flags < 0) {
  429. obj_setErrorCode(self, __LINE__);
  430. return NULL;
  431. }
  432. if (!obj_isArgExist(self, "_re"))
  433. return NULL;
  434. pcre* re = obj_getPtr(self, "_re");
  435. int ven = -1;
  436. int* vec = re_match2(re, subject, strlen(subject), &ven, flags);
  437. if (!vec) {
  438. if (ven < 0)
  439. obj_setErrorCode(self, -__LINE__);
  440. return NULL;
  441. }
  442. Any m = newNormalObj(New_re_Match);
  443. re_Match___init__args(m, subject, vec, ven);
  444. return m;
  445. }
  446. PikaObj* re_Pattern_fullmatch(PikaObj* self, char* subject, PikaTuple* val) {
  447. int flags = 0;
  448. flags = _get_flags(val);
  449. if (flags < 0) {
  450. obj_setErrorCode(self, __LINE__);
  451. return NULL;
  452. }
  453. if (!obj_isArgExist(self, "_re"))
  454. return NULL;
  455. pcre* re = obj_getPtr(self, "_re");
  456. int ven = -1;
  457. int* vec = re_fullmatch2(re, subject, strlen(subject), &ven, flags);
  458. if (!vec) {
  459. if (ven < 0)
  460. obj_setErrorCode(self, -__LINE__);
  461. return NULL;
  462. }
  463. Any m = newNormalObj(New_re_Match);
  464. re_Match___init__args(m, subject, vec, ven);
  465. return m;
  466. }
  467. PikaObj* re_Pattern_search(PikaObj* self, char* subject, PikaTuple* val) {
  468. int flags = 0;
  469. flags = _get_flags(val);
  470. if (flags < 0) {
  471. obj_setErrorCode(self, __LINE__);
  472. return NULL;
  473. }
  474. if (!obj_isArgExist(self, "_re"))
  475. return NULL;
  476. pcre* re = obj_getPtr(self, "_re");
  477. Any m = newNormalObj(New_re_Match);
  478. int ven = -1;
  479. int* vec = re_search2(re, subject, strlen(subject), &ven, flags);
  480. if (!vec) {
  481. if (ven < 0)
  482. obj_setErrorCode(self, -__LINE__);
  483. return NULL;
  484. }
  485. re_Match___init__args(m, subject, vec, ven);
  486. return m;
  487. }
  488. char* re_Pattern_sub(PikaObj* self, char* repl, char* subjet, PikaTuple* val) {
  489. int flags = 0;
  490. int count = 0;
  491. int argn = pikaTuple_getSize(val);
  492. if (argn >= 1) {
  493. Arg* arg_i = pikaTuple_getArg(val, 0);
  494. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  495. obj_setErrorCode(self, -__LINE__);
  496. return NULL;
  497. }
  498. count = arg_getInt(arg_i);
  499. }
  500. if (argn >= 2) {
  501. Arg* arg_i = pikaTuple_getArg(val, 1);
  502. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  503. obj_setErrorCode(self, -__LINE__);
  504. return NULL;
  505. }
  506. flags = arg_getInt(arg_i);
  507. if (flags | PCRE_ONLY_ASCII) {
  508. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  509. }
  510. }
  511. if (!obj_isArgExist(self, "_re"))
  512. return NULL;
  513. pcre* re = obj_getPtr(self, "_re");
  514. int length = strlen(subjet);
  515. int matched_times = 0;
  516. char* s = re_subn2(re, repl, subjet, length, count, flags, &matched_times);
  517. obj_setInt(self, "_n", matched_times);
  518. if (!s) {
  519. obj_setErrorCode(self, -__LINE__);
  520. return NULL;
  521. }
  522. if (s == subjet) {
  523. obj_setStr(self, "_b", subjet);
  524. return obj_getStr(self, "_b");
  525. }
  526. int len = strlen(s);
  527. char* b = (char*)malloc(len + 1);
  528. if (!b) {
  529. free(s);
  530. return NULL;
  531. }
  532. memcpy(b, s, len);
  533. b[len] = 0;
  534. obj_setStr(self, "_b", b);
  535. free(b);
  536. free(s);
  537. return obj_getStr(self, "_b");
  538. }
  539. PikaObj* re_Pattern_subn(PikaObj* self,
  540. char* repl,
  541. char* subjet,
  542. PikaTuple* val) {
  543. if (!obj_isArgExist(self, "_re"))
  544. return NULL;
  545. int flags = 0;
  546. int count = 0;
  547. int argn = pikaTuple_getSize(val);
  548. if (argn >= 1) {
  549. Arg* arg_i = pikaTuple_getArg(val, 0);
  550. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  551. obj_setErrorCode(self, -__LINE__);
  552. return NULL;
  553. }
  554. count = arg_getInt(arg_i);
  555. }
  556. if (argn >= 2) {
  557. Arg* arg_i = pikaTuple_getArg(val, 1);
  558. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  559. obj_setErrorCode(self, -__LINE__);
  560. return NULL;
  561. }
  562. flags = arg_getInt(arg_i);
  563. if (flags | PCRE_ONLY_ASCII) {
  564. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  565. }
  566. }
  567. pcre* re = obj_getPtr(self, "_re");
  568. Any res = __subn(re, repl, subjet, count, flags, 1);
  569. if (!res)
  570. raise_error;
  571. return res;
  572. }
  573. PikaObj* re_Pattern_split(PikaObj* self, char* subject, PikaTuple* val) {
  574. if (!obj_isArgExist(self, "_re"))
  575. return NULL;
  576. pcre* re = obj_getPtr(self, "_re");
  577. int flags = PCRE_UTF8;
  578. int max_split = 0;
  579. int argn = pikaTuple_getSize(val);
  580. if (argn >= 1) {
  581. Arg* arg_i = pikaTuple_getArg(val, 0);
  582. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  583. obj_setErrorCode(self, -__LINE__);
  584. return NULL;
  585. }
  586. max_split = arg_getInt(arg_i);
  587. }
  588. if (argn >= 2) {
  589. Arg* arg_i = pikaTuple_getArg(val, 1);
  590. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  591. obj_setErrorCode(self, -__LINE__);
  592. return NULL;
  593. }
  594. flags = arg_getInt(arg_i);
  595. if (flags | PCRE_ONLY_ASCII) {
  596. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  597. }
  598. }
  599. Any list = __split(re, subject, max_split, flags, 1);
  600. if (!list)
  601. raise_error;
  602. return list;
  603. }
  604. int _get_flags(PikaTuple* val) {
  605. int flags = PCRE_UTF8;
  606. int argn = pikaTuple_getSize(val);
  607. if (argn >= 1) {
  608. Arg* arg_i = pikaTuple_getArg(val, 0);
  609. if (arg_getType(arg_i) != ARG_TYPE_INT) {
  610. return -1;
  611. }
  612. flags |= arg_getInt(arg_i);
  613. if (flags & PCRE_ONLY_ASCII) {
  614. flags &= ~(PCRE_ONLY_ASCII | PCRE_UTF8);
  615. }
  616. }
  617. return flags;
  618. }
  619. PikaObj* __split(void* pattern__or__re,
  620. char* subject,
  621. int max_split,
  622. int flags,
  623. int mode_re)
  624. {
  625. int sub_length = strlen(subject);
  626. int j2 = 0;
  627. int _m_n = 0, m_n = 0;
  628. int brackets = -1;
  629. int** vcs;
  630. if (mode_re)
  631. vcs = re_searchall2((pcre*)pattern__or__re, subject, sub_length, &_m_n,
  632. &brackets, flags);
  633. else
  634. vcs = re_searchall((char*)pattern__or__re, subject, sub_length, &_m_n,
  635. &brackets, flags);
  636. m_n = _m_n;
  637. char* b = NULL;
  638. Arg* str_arg1;
  639. // Arg *sub_arg;
  640. if (!vcs) {
  641. return NULL;
  642. }
  643. if (max_split && max_split < m_n)
  644. m_n = max_split;
  645. Any list = newNormalObj(New_PikaStdData_List);
  646. PikaStdData_List___init__(list);
  647. int start = 0;
  648. if (brackets == 1) {
  649. for (int i = 0; i < m_n; i++) {
  650. int* v = vcs[i];
  651. int length = v[0] - start;
  652. if (length) {
  653. b = malloc(length + 1);
  654. if (!b)
  655. goto e_er;
  656. b[length] = 0;
  657. memcpy(b, subject + start, length);
  658. } else {
  659. b = (char*)"";
  660. }
  661. str_arg1 = arg_newStr(b);
  662. PikaStdData_List_append(list, str_arg1);
  663. arg_deinit(str_arg1);
  664. if (length)
  665. free(b);
  666. start = v[1];
  667. }
  668. if (start <= sub_length) {
  669. str_arg1 = arg_newStr(subject + start);
  670. PikaStdData_List_append(list, str_arg1);
  671. arg_deinit(str_arg1);
  672. }
  673. goto exit;
  674. }
  675. for (int i = 0; i < m_n; i++) {
  676. int* v = vcs[i];
  677. int length = v[0] - start;
  678. b = malloc(length + 1);
  679. if (!b)
  680. goto e_er;
  681. memcpy(b, subject + start, length);
  682. b[length] = 0;
  683. str_arg1 = arg_newStr(b);
  684. PikaStdData_List_append(list, str_arg1);
  685. arg_deinit(str_arg1);
  686. for (int j = 1; j < brackets; j++) {
  687. j2 = j * 2;
  688. int length2 = v[j2 + 1] - v[j2];
  689. if (length2 > length) {
  690. free(b);
  691. length = length2;
  692. b = malloc(length + 1);
  693. if (!b)
  694. goto e_er;
  695. }
  696. b[length2] = 0;
  697. memcpy(b, subject + v[j2], length2);
  698. str_arg1 = arg_newStr(b);
  699. PikaStdData_List_append(list, str_arg1);
  700. arg_deinit(str_arg1);
  701. }
  702. start = v[1];
  703. free(b);
  704. }
  705. if (start <= sub_length) {
  706. str_arg1 = arg_newStr(subject + start);
  707. PikaStdData_List_append(list, str_arg1);
  708. arg_deinit(str_arg1);
  709. }
  710. goto exit;
  711. e_er:
  712. if (list) {
  713. obj_deinit(list);
  714. list = NULL;
  715. }
  716. exit:
  717. if (vcs)
  718. re_free_searchall(vcs, _m_n);
  719. return list;
  720. }
  721. PikaObj* __findall(void* pattern__or__re,
  722. char* subject,
  723. int flags,
  724. int mode_re) {
  725. int length = strlen(subject);
  726. int j2 = 0;
  727. int m_n = -1;
  728. int brackets = -1;
  729. int** vcs;
  730. if (mode_re)
  731. vcs = re_searchall2((pcre*)pattern__or__re, subject, length, &m_n,
  732. &brackets, flags);
  733. else
  734. vcs = re_searchall((char*)pattern__or__re, subject, length, &m_n,
  735. &brackets, flags);
  736. char* b = NULL;
  737. if (!vcs) {
  738. if (m_n < 0)
  739. return NULL;
  740. Any list = New_PikaList();
  741. return list;
  742. }
  743. Any list = New_PikaList();
  744. PikaTuple* tu;
  745. if (brackets == 1) {
  746. for (int i = 0; i < m_n; i++) {
  747. int* v = vcs[i];
  748. length = v[1] - v[0];
  749. if (length) {
  750. b = malloc(length + 1);
  751. if (!b)
  752. goto e_er;
  753. b[length] = 0;
  754. memcpy(b, subject + v[0], length);
  755. } else {
  756. b = (char*)"";
  757. }
  758. pikaList_append(list, arg_newStr(b));
  759. if (length)
  760. free(b);
  761. }
  762. goto exit;
  763. }
  764. for (int i = 0; i < m_n; i++) {
  765. int* v = vcs[i];
  766. length = v[1] - v[0];
  767. b = malloc(length + 1);
  768. if (!b)
  769. goto e_er;
  770. tu = New_pikaTuple();
  771. for (int j = 1; j < brackets; j++) {
  772. j2 = j * 2;
  773. length = v[j2 + 1] - v[j2];
  774. b[length] = 0;
  775. memcpy(b, subject + v[j2], length);
  776. pikaList_append(tu, arg_newStr(b));
  777. }
  778. pikaList_append(list, arg_newObj(tu));
  779. free(b);
  780. }
  781. goto exit;
  782. e_er:
  783. if (list) {
  784. obj_deinit(list);
  785. list = NULL;
  786. }
  787. exit:
  788. if (vcs)
  789. re_free_searchall(vcs, m_n);
  790. return list;
  791. }
  792. PikaObj* __subn(void* pattern__or__re,
  793. char* repl,
  794. char* subjet,
  795. int count,
  796. int flags,
  797. int mode_re) {
  798. int length = strlen(subjet);
  799. int matched_times = 0;
  800. char* s;
  801. if (mode_re)
  802. s = re_subn2((pcre*)pattern__or__re, repl, subjet, length, count, flags,
  803. &matched_times);
  804. else
  805. s = pcre_subn((char*)pattern__or__re, repl, subjet, length, count,
  806. flags, &matched_times);
  807. if (!s) {
  808. return NULL;
  809. }
  810. if (s == subjet) {
  811. PikaTuple* yup = New_pikaTuple();
  812. pikaList_append(yup, arg_newStr(s));
  813. pikaList_append(yup, arg_newInt(0));
  814. Any tuple_obj = newNormalObj(New_PikaStdData_Tuple);
  815. obj_setPtr(tuple_obj, "list", yup);
  816. return tuple_obj;
  817. }
  818. PikaTuple* yup = New_pikaTuple();
  819. pikaList_append(yup, arg_newStr(s));
  820. free(s);
  821. pikaList_append(yup, arg_newInt(matched_times));
  822. Any tuple_obj = newNormalObj(New_PikaStdData_Tuple);
  823. obj_setPtr(tuple_obj, "list", yup);
  824. return tuple_obj;
  825. }