PikaStdData_String.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. #include "PikaStdData_String.h"
  2. #include "PikaStdData_List.h"
  3. #include "PikaStdData_String_Util.h"
  4. #include "dataStrs.h"
  5. char* _strlwr(char *str);
  6. Arg *PikaStdData_String___iter__(PikaObj *self)
  7. {
  8. obj_setInt(self, "__iter_i", 0);
  9. return arg_newRef(self);
  10. }
  11. void PikaStdData_String_set(PikaObj *self, char *s)
  12. {
  13. #if PIKA_STRING_UTF8_ENABLE
  14. int r = _valid_utf8(s, -1);
  15. if (r >= 0)
  16. {
  17. obj_setErrorCode(self, __LINE__);
  18. __platform_printf("Error invaliad character %x\r\n", s[r]);
  19. return;
  20. }
  21. #endif
  22. obj_setStr(self, "str", s);
  23. }
  24. void PikaStdData_String___init__(PikaObj *self, char *s)
  25. {
  26. #if PIKA_STRING_UTF8_ENABLE
  27. int r = _valid_utf8(s, -1);
  28. if (r >= 0)
  29. {
  30. obj_setErrorCode(self, __LINE__);
  31. __platform_printf("Error invaliad character %x\r\n", s[r]);
  32. return;
  33. }
  34. #endif
  35. PikaStdData_String_set(self, s);
  36. }
  37. char *PikaStdData_String_get(PikaObj *self)
  38. {
  39. return obj_getStr(self, "str");
  40. }
  41. Arg *PikaStdData_String___next__(PikaObj *self)
  42. {
  43. int __iter_i = args_getInt(self->list, "__iter_i");
  44. char *str = obj_getStr(self, "str");
  45. uint16_t len = strGetSize(str);
  46. #if PIKA_STRING_UTF8_ENABLE
  47. char char_buff[5];
  48. int r = _utf8_get(str, len, __iter_i, char_buff);
  49. if (r < 0)
  50. {
  51. return arg_newNull();
  52. }
  53. args_setInt(self->list, "__iter_i", __iter_i + 1);
  54. return arg_newStr((char *)char_buff);
  55. #else
  56. Arg *res = NULL;
  57. char char_buff[] = " ";
  58. if (__iter_i < len)
  59. {
  60. char_buff[0] = str[__iter_i];
  61. res = arg_newStr((char *)char_buff);
  62. }
  63. else
  64. {
  65. return arg_newNull();
  66. }
  67. args_setInt(self->list, "__iter_i", __iter_i + 1);
  68. return res;
  69. #endif
  70. }
  71. Arg *PikaStdData_String___getitem__(PikaObj *self, Arg *__key)
  72. {
  73. int key_i = arg_getInt(__key);
  74. if (key_i < 0){
  75. key_i = PikaStdData_String___len__(self) + key_i;
  76. }
  77. char *str = obj_getStr(self, "str");
  78. uint16_t len = strGetSize(str);
  79. #if PIKA_STRING_UTF8_ENABLE
  80. char char_buff[5];
  81. int r = _utf8_get(str, len, key_i, char_buff);
  82. if (r < 0)
  83. {
  84. return arg_newNull();
  85. }
  86. return arg_newStr((char *)char_buff);
  87. #else
  88. char char_buff[] = " ";
  89. if (key_i < len)
  90. {
  91. char_buff[0] = str[key_i];
  92. return arg_newStr((char *)char_buff);
  93. }
  94. else
  95. {
  96. return arg_newNull();
  97. }
  98. #endif
  99. }
  100. void PikaStdData_String___setitem__(PikaObj *self, Arg *__key, Arg *__val)
  101. {
  102. int key_i = arg_getInt(__key);
  103. char *str = obj_getStr(self, "str");
  104. char *val = arg_getStr(__val);
  105. uint16_t len = strGetSize(str);
  106. #if PIKA_STRING_UTF8_ENABLE
  107. int len2 = strlen(val);
  108. int is_invalid = _valid_utf8(val, len2);
  109. if (is_invalid >= 0)
  110. {
  111. obj_setErrorCode(self, __LINE__);
  112. __platform_printf("Error String invalid\r\n");
  113. return;
  114. }
  115. int ulen_val = _utf8_strlen(val, len2);
  116. if (ulen_val != 1)
  117. {
  118. obj_setErrorCode(self, __LINE__);
  119. __platform_printf("Error String invalid char\r\n");
  120. return;
  121. }
  122. int char_len;
  123. int repl_at = _utf8_get_offset(str, len, key_i, &char_len);
  124. if (repl_at < 0)
  125. {
  126. obj_setErrorCode(self, __LINE__);
  127. __platform_printf("Error String Overflow\r\n");
  128. return;
  129. }
  130. int ok = __str_repl(self, str, len, repl_at, char_len, val, len2);
  131. if (ok < 0)
  132. {
  133. obj_setErrorCode(self, __LINE__);
  134. __platform_printf("Error. Internal error(-%d)\r\n", __LINE__);
  135. return;
  136. }
  137. #else
  138. if (key_i >= len)
  139. {
  140. obj_setErrorCode(self, 1);
  141. __platform_printf("Error String Overflow\r\n");
  142. return;
  143. }
  144. str[key_i] = val[0];
  145. #endif
  146. }
  147. char *PikaStdData_String___str__(PikaObj *self)
  148. {
  149. return obj_getStr(self, "str");
  150. }
  151. int PikaStdData_String_startswith(PikaObj *self, char *prefix)
  152. {
  153. char *str = obj_getStr(self, "str");
  154. char *p = prefix;
  155. int i = 0;
  156. while (*p != '\0')
  157. {
  158. if (*p != str[i])
  159. return 0;
  160. p++;
  161. i++;
  162. }
  163. return 1;
  164. }
  165. int PikaStdData_String_endswith(PikaObj *self, char *suffix)
  166. {
  167. char *str = obj_getStr(self, "str");
  168. int len1 = strlen(str);
  169. int len2 = strlen(suffix);
  170. while (len2 >= 1)
  171. {
  172. if (suffix[len2 - 1] != str[len1 - 1])
  173. return 0;
  174. len2--;
  175. len1--;
  176. }
  177. return 1;
  178. }
  179. int PikaStdData_String_isdigit(PikaObj *self)
  180. {
  181. char *str = obj_getStr(self, "str");
  182. int i = 0;
  183. while (str[i] != '\0')
  184. {
  185. if (!isdigit((int)str[i]))
  186. return 0;
  187. i++;
  188. }
  189. return 1;
  190. }
  191. int PikaStdData_String_islower(PikaObj *self)
  192. {
  193. char *str = obj_getStr(self, "str");
  194. int i = 0;
  195. while (str[i] != '\0')
  196. {
  197. if (!islower((int)str[i]))
  198. return 0;
  199. i++;
  200. }
  201. return 1;
  202. }
  203. int PikaStdData_String_isalnum(PikaObj *self)
  204. {
  205. char *str = obj_getStr(self, "str");
  206. int i = 0;
  207. while (str[i] != '\0')
  208. {
  209. if (!isalnum((int)str[i]))
  210. return 0;
  211. i++;
  212. }
  213. return 1;
  214. }
  215. int PikaStdData_String_isalpha(PikaObj *self)
  216. {
  217. char *str = obj_getStr(self, "str");
  218. int i = 0;
  219. while (str[i] != '\0')
  220. {
  221. if (!isalpha((int)str[i]))
  222. return 0;
  223. i++;
  224. }
  225. return 1;
  226. }
  227. int PikaStdData_String_isspace(PikaObj *self)
  228. {
  229. char *str = obj_getStr(self, "str");
  230. int i = 0;
  231. while (str[i] != '\0')
  232. {
  233. if (!isspace((int)str[i]))
  234. return 0;
  235. i++;
  236. }
  237. return 1;
  238. }
  239. PikaObj *PikaStdData_String_split(PikaObj *self, char *s)
  240. {
  241. /* 创建 list 对象 */
  242. PikaObj *list = newNormalObj(New_PikaStdData_List);
  243. /* 初始化 list */
  244. PikaStdData_List___init__(list);
  245. Args buffs = {0};
  246. char *str = strsCopy(&buffs, obj_getStr(self, "str"));
  247. char sign = s[0];
  248. int token_num = strCountSign(str, sign) + 1;
  249. for (int i = 0; i < token_num; i++)
  250. {
  251. char *token = strsPopToken(&buffs, str, sign);
  252. /* 用 arg_set<type> 的 api 创建 arg */
  253. Arg *token_arg = arg_newStr(token);
  254. /* 添加到 list 对象 */
  255. PikaStdData_List_append(list, token_arg);
  256. /* 销毁 arg */
  257. arg_deinit(token_arg);
  258. }
  259. strsDeinit(&buffs);
  260. return list;
  261. }
  262. int PikaStdData_String___len__(PikaObj *self)
  263. {
  264. char *str = obj_getStr(self, "str");
  265. #if PIKA_STRING_UTF8_ENABLE
  266. int n = _utf8_strlen(str, -1);
  267. if (n < 0)
  268. {
  269. obj_setErrorCode(self, __LINE__);
  270. __platform_printf("Error. Internal error(%d)\r\n", __LINE__);
  271. return n;
  272. }
  273. return n;
  274. #else
  275. return strGetSize(str);
  276. #endif
  277. }
  278. char *PikaStdData_String_strip(PikaObj *self)
  279. {
  280. Args buffs = {0};
  281. char *str = strsCopy(&buffs, obj_getStr(self, "str"));
  282. /* strip */
  283. char *str_start = str;
  284. for (size_t i = 0; i < strGetSize(str); i++)
  285. {
  286. if (str[i] != ' ')
  287. {
  288. str_start = (char *)(str + i);
  289. break;
  290. }
  291. }
  292. for (int i = strGetSize(str) - 1; i >= 0; i--)
  293. {
  294. if (str[i] != ' ')
  295. {
  296. str[i + 1] = '\0';
  297. break;
  298. }
  299. }
  300. obj_setStr(self, "_buf", str_start);
  301. strsDeinit(&buffs);
  302. return obj_getStr(self, "_buf");
  303. }
  304. char *PikaStdData_String_replace(PikaObj *self, char *old, char *new)
  305. {
  306. Args buffs = {0};
  307. char *str = strsCopy(&buffs, obj_getStr(self, "str"));
  308. str = strsReplace(&buffs, str, old, new);
  309. obj_setStr(self, "_buf", str);
  310. strsDeinit(&buffs);
  311. return obj_getStr(self, "_buf");
  312. }
  313. Arg *PikaStdData_String_encode(PikaObj *self, PikaTuple *encoding)
  314. {
  315. char *str = obj_getStr(self, "str");
  316. #if PIKA_STRING_UTF8_ENABLE
  317. char *to_code = NULL;
  318. int argn = tuple_getSize(encoding);
  319. if (argn < 1)
  320. {
  321. return arg_newBytes((uint8_t *)str, strGetSize(str));
  322. }
  323. Arg *arg_i = tuple_getArg(encoding, 0);
  324. if (arg_getType(arg_i) != ARG_TYPE_STRING)
  325. {
  326. obj_setErrorCode(self, __LINE__);
  327. __platform_printf("Error invaliad arguments\r\n");
  328. return NULL;
  329. }
  330. to_code = arg_getStr(arg_i);
  331. _strlwr(to_code);
  332. Arg *res = _str_encode(str, to_code);
  333. if (!res)
  334. {
  335. obj_setErrorCode(self, __LINE__);
  336. __platform_printf("Error internal error\r\n");
  337. return NULL;
  338. }
  339. return res;
  340. #else
  341. return arg_newBytes((uint8_t *)str, strGetSize(str));
  342. #endif
  343. }
  344. #if PIKA_STRING_UTF8_ENABLE
  345. static const uint8_t _pcre_utf8_table4[] = {
  346. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  347. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  348. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  349. 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5};
  350. const char mask1 = 0x80;
  351. const char mask2 = 0xc0;
  352. const char mask3 = 0xe0;
  353. const char mask4 = 0xf0;
  354. const char nmask1 = 0x3f;
  355. const char nmask2 = 0x1f;
  356. const char nmask3 = 0x0f;
  357. const char nmask4 = 0x07;
  358. int _valid_utf8(const char *string, int length)
  359. {
  360. const uint8_t *p;
  361. if (length < 0)
  362. {
  363. length = strlen(string);
  364. }
  365. for (p = (const uint8_t *)string; length-- > 0; p++)
  366. {
  367. int ab;
  368. int c = *p;
  369. if (!(c & 0x80))
  370. continue;
  371. if (c < 0xc0)
  372. return (uintptr_t)p - (uintptr_t)string;
  373. ab = _pcre_utf8_table4[c & 0x3f];
  374. if (length < ab || ab > 3)
  375. return (uintptr_t)p - (uintptr_t)string;
  376. length -= ab;
  377. if ((*(++p) & 0xc0) != 0x80)
  378. return (uintptr_t)p - (uintptr_t)string;
  379. switch (ab)
  380. {
  381. case 1:
  382. if ((c & 0x3e) == 0)
  383. return (uintptr_t)p - (uintptr_t)string;
  384. continue;
  385. case 2:
  386. if ((c == 0xe0 && (*p & 0x20) == 0) ||
  387. (c == 0xed && *p >= 0xa0))
  388. return (uintptr_t)p - (uintptr_t)string;
  389. break;
  390. case 3:
  391. if ((c == 0xf0 && (*p & 0x30) == 0) ||
  392. (c > 0xf4) ||
  393. (c == 0xf4 && *p > 0x8f))
  394. return (uintptr_t)p - (uintptr_t)string;
  395. break;
  396. }
  397. while (--ab > 0)
  398. {
  399. if ((*(++p) & 0xc0) != 0x80)
  400. return (uintptr_t)p - (uintptr_t)string;
  401. }
  402. }
  403. return -1;
  404. }
  405. int _utf8_get(const char *string, int length, int at, char *out_buf)
  406. {
  407. const uint8_t *p;
  408. int ab, c;
  409. if (length < 0)
  410. {
  411. length = strlen(string);
  412. }
  413. if (at < 0 || at >= length)
  414. return -1;
  415. for (p = (const uint8_t *)string; length > 0 && at; p++, at--)
  416. {
  417. c = *p;
  418. if (!(c & 0x80))
  419. {
  420. length--;
  421. continue;
  422. }
  423. ab = _pcre_utf8_table4[c & 0x3f];
  424. p += ab++;
  425. length -= ab;
  426. }
  427. if (at || length <= 0)
  428. return -2;
  429. c = *p;
  430. if (!(c & 0x80))
  431. {
  432. *out_buf = c;
  433. out_buf[1] = 0;
  434. return 1;
  435. };
  436. ab = _pcre_utf8_table4[c & 0x3f] + 1;
  437. __platform_memcpy(out_buf, p, ab);
  438. out_buf[ab] = '\0';
  439. return ab;
  440. }
  441. int _utf8_get_offset(const char *string, int length, int at, int *out_char_len)
  442. {
  443. const uint8_t *p;
  444. int ab, c;
  445. if (length < 0)
  446. {
  447. length = strlen(string);
  448. }
  449. if (at < 0 || at >= length)
  450. return -1;
  451. for (p = (const uint8_t *)string; length > 0 && at; p++, at--)
  452. {
  453. c = *p;
  454. if (!(c & 0x80))
  455. {
  456. length--;
  457. continue;
  458. }
  459. ab = _pcre_utf8_table4[c & 0x3f];
  460. p += ab++;
  461. length -= ab;
  462. }
  463. if (at)
  464. return -2;
  465. c = *p;
  466. if (!(c & 0x80))
  467. {
  468. if (out_char_len)
  469. *out_char_len = 1;
  470. return (uintptr_t)p - (uintptr_t)string;
  471. };
  472. ab = _pcre_utf8_table4[c & 0x3f] + 1;
  473. if (out_char_len)
  474. *out_char_len = ab;
  475. return (uintptr_t)p - (uintptr_t)string;
  476. }
  477. int _utf8_strlen(const char *string, int length)
  478. {
  479. const uint8_t *p;
  480. int i, ab, c;
  481. if (length < 0)
  482. {
  483. length = strlen(string);
  484. }
  485. for (i = 0, p = (const uint8_t *)string; length > 0; i++, p++)
  486. {
  487. c = *p;
  488. if (!(c & 0x80))
  489. {
  490. length--;
  491. continue;
  492. }
  493. ab = _pcre_utf8_table4[c & 0x3f];
  494. p += ab++;
  495. length -= ab;
  496. }
  497. if (length < 0)
  498. return -1;
  499. return i;
  500. }
  501. int __str_repl(PikaObj *self, char *str, int str_len, int repl_at, int repl_len, char *val, int val_len)
  502. {
  503. if (val_len > repl_len)
  504. {
  505. str[repl_at] = 0;
  506. Arg *s_new = arg_newStr(str);
  507. if (!s_new)
  508. return -1;
  509. s_new = arg_strAppend(s_new, val);
  510. s_new = arg_strAppend(s_new, str + repl_at + repl_len);
  511. obj_removeArg(self, "str");
  512. int rs = obj_setArg(self, "str", s_new);
  513. arg_deinit(s_new);
  514. if (rs)
  515. return -rs;
  516. return 0;
  517. }
  518. char *s = str + repl_at;
  519. __platform_memcpy(s, val, val_len);
  520. __platform_memmove(s + val_len, s + repl_len, str_len - repl_at - repl_len + 1);
  521. return 0;
  522. }
  523. int __utf8_to_utf32_char_LE(const char *utf8, char *out_buf)
  524. {
  525. char c = *utf8;
  526. if (!(c & mask1))
  527. {
  528. *out_buf = c;
  529. out_buf[1] = 0;
  530. out_buf[2] = 0;
  531. out_buf[3] = 0;
  532. return 1;
  533. }
  534. int left_length = _pcre_utf8_table4[c & 0x3f];
  535. char a, b, d;
  536. switch (left_length)
  537. {
  538. case 1:
  539. a = c & nmask2;
  540. b = utf8[1] & nmask1;
  541. out_buf[0] = b | a << 6;
  542. out_buf[1] = a >> 2;
  543. out_buf[2] = 0;
  544. out_buf[3] = 0;
  545. return 2;
  546. case 2:
  547. a = c & nmask3;
  548. b = utf8[1] & nmask1;
  549. c = utf8[2] & nmask1;
  550. out_buf[0] = c | b << 6;
  551. out_buf[1] = b >> 2 | a << 4;
  552. out_buf[2] = 0;
  553. out_buf[3] = 0;
  554. return 3;
  555. case 3:
  556. a = c & nmask4;
  557. b = utf8[1] & nmask1;
  558. c = utf8[2] & nmask1;
  559. d = utf8[3] & nmask1;
  560. out_buf[0] = d | c << 6;
  561. out_buf[1] = c >> 2 | b << 4;
  562. out_buf[2] = b >> 4 | a << 2;
  563. out_buf[3] = 0;
  564. return 4;
  565. default:
  566. return 0;
  567. }
  568. }
  569. int __utf8_to_utf32_LE_noBOM_get_size(const char *utf8, int len)
  570. {
  571. char *p = (char*) utf8;
  572. char buf[4];
  573. int space_sum = 0;
  574. while (len > 0)
  575. {
  576. int size = __utf8_to_utf32_char_LE(p, buf);
  577. if (!size)
  578. return -1;
  579. p += size;
  580. len -= size;
  581. space_sum++;
  582. }
  583. return space_sum * 4;
  584. }
  585. int __utf8_to_utf32_LE_noBOM(const char *utf8, int len, char *out_buf)
  586. {
  587. char *q = out_buf;
  588. char *p = (char*) utf8;
  589. while (len > 0)
  590. {
  591. int size = __utf8_to_utf32_char_LE(p, q);
  592. if (!size)
  593. return -1;
  594. p += size;
  595. len -= size;
  596. q += 4;
  597. }
  598. return q - out_buf;
  599. }
  600. int __utf8_to_utf32_LE_withBOM(const char *utf8, int len, char *out_buf)
  601. {
  602. int size = __utf8_to_utf32_LE_noBOM(utf8, len, out_buf + 4);
  603. if (size < 0)
  604. {
  605. return size;
  606. }
  607. out_buf[0] = '\xff';
  608. out_buf[1] = '\xfe';
  609. out_buf[2] = 0;
  610. out_buf[3] = 0;
  611. return size + 4;
  612. }
  613. int32_t __utf8_decode(const char *utf8, int left_length)
  614. {
  615. int ucode = -1;
  616. char c = *utf8;
  617. if (!(c & mask1))
  618. {
  619. return c;
  620. }
  621. char a, b, d;
  622. switch (left_length)
  623. {
  624. case 1:
  625. a = c & nmask2;
  626. b = utf8[1] & nmask1;
  627. ucode = b | (a & 0x03) << 6;
  628. ucode |= (a >> 2) << 8;
  629. break;
  630. case 2:
  631. a = c & nmask3;
  632. b = utf8[1] & nmask1;
  633. c = utf8[2] & nmask1;
  634. ucode = c | (b & 0x03) << 6;
  635. ucode |= (b >> 2 | a << 4) << 8;
  636. break;
  637. case 3:
  638. a = c & nmask4;
  639. b = utf8[1] & nmask1;
  640. c = utf8[2] & nmask1;
  641. d = utf8[3] & nmask1;
  642. ucode = d | (c & 0x03) << 6;
  643. ucode |= (c >> 2 | (b & 0x0f) << 4) << 8;
  644. ucode |= (b >> 4 | a << 2) << 16;
  645. break;
  646. default:
  647. return -1;
  648. }
  649. return ucode;
  650. }
  651. int __unicode_to_utf16_char_LE(int32_t u, char *out_buf)
  652. {
  653. if (!(u & 0xffff0000))
  654. {
  655. out_buf[0] = u & 0xff;
  656. out_buf[1] = (u & 0xff00) >> 8;
  657. return 2;
  658. }
  659. int32_t d = u - 0x10000;
  660. int32_t L = d & 0x3ff;
  661. int32_t U = d >> 10;
  662. L = L | 0xdc00;
  663. U = U | 0xd800;
  664. out_buf[0] = U & 0xff;
  665. out_buf[1] = (U & 0xff00) >> 8;
  666. out_buf[2] = L & 0xff;
  667. out_buf[3] = (L & 0xff00) >> 8;
  668. return 4;
  669. }
  670. int __utf8_to_utf16_LE_noBOM(const char *utf8, int len, char *out_buf)
  671. {
  672. char *q = out_buf;
  673. char *p = (char*) utf8;
  674. while (len > 0)
  675. {
  676. char c = *p;
  677. int32_t ucode;
  678. if (!(c & mask1))
  679. {
  680. ucode = c;
  681. p++;
  682. len--;
  683. }
  684. else
  685. {
  686. int left_size = _pcre_utf8_table4[c & 0x3f];
  687. ucode = __utf8_decode(p, left_size++);
  688. if (ucode < 0)
  689. return ucode;
  690. p += left_size;
  691. len -= left_size;
  692. }
  693. int size = __unicode_to_utf16_char_LE(ucode, q);
  694. q += size;
  695. }
  696. return q - out_buf;
  697. }
  698. int __utf8_to_utf16_LE_noBOM_get_size(const char *utf8, int len)
  699. {
  700. char out_buf[4];
  701. char *p = (char*) utf8;
  702. int need_space = 0;
  703. while (len > 0)
  704. {
  705. char c = *p;
  706. int32_t ucode;
  707. if (!(c & mask1))
  708. {
  709. ucode = c;
  710. p++;
  711. len--;
  712. }
  713. else
  714. {
  715. int left_size = _pcre_utf8_table4[c & 0x3f];
  716. ucode = __utf8_decode(p, left_size++);
  717. if (ucode < 0)
  718. return ucode;
  719. p += left_size;
  720. len -= left_size;
  721. }
  722. int size = __unicode_to_utf16_char_LE(ucode, out_buf);
  723. need_space += size;
  724. }
  725. return need_space;
  726. }
  727. int __utf8_to_utf16_LE_withBOM(const char *utf8, int len, char *out_buf)
  728. {
  729. int size = __utf8_to_utf16_LE_noBOM(utf8, len, out_buf + 2);
  730. if (size < 0)
  731. {
  732. return size;
  733. }
  734. out_buf[0] = '\xff';
  735. out_buf[1] = '\xfe';
  736. return size + 2;
  737. }
  738. Arg *_str_encode(char *str, char *encoding)
  739. {
  740. if (strEqu(encoding, "utf-8"))
  741. {
  742. return arg_newBytes((uint8_t *)str, strGetSize(str));
  743. }
  744. int len = strlen(str);
  745. if (strEqu(encoding, "ascii"))
  746. {
  747. int ulen = _utf8_strlen(str, len);
  748. if (ulen == len)
  749. {
  750. return arg_newBytes((uint8_t *)str, strGetSize(str));
  751. }
  752. __platform_printf("Warning there is non-ascii characters\r\n");
  753. char *b = (char *)pikaMalloc(len + 1);
  754. if (!b)
  755. {
  756. return NULL;
  757. }
  758. char *p = str;
  759. char *q = b;
  760. char c = *p++;
  761. while (c)
  762. {
  763. if (!(c & 0x80))
  764. {
  765. *q++ = c;
  766. }
  767. c = *p++;
  768. }
  769. *q = 0;
  770. Arg *arg = arg_newBytes((uint8_t *)b, strGetSize(b));
  771. pikaFree(b, len + 1);
  772. return arg;
  773. }
  774. if (strEqu(encoding, "utf-16"))
  775. {
  776. int size_needed = __utf8_to_utf16_LE_noBOM_get_size(str, len);
  777. if (size_needed <= 0)
  778. {
  779. return NULL;
  780. }
  781. size_needed += 2;
  782. char *b = (char *)pikaMalloc(size_needed);
  783. if (!b)
  784. {
  785. return NULL;
  786. }
  787. int ok = __utf8_to_utf16_LE_withBOM(str, len, b);
  788. if (ok < 0)
  789. {
  790. pikaFree(b, size_needed);
  791. return NULL;
  792. }
  793. Arg *arg = arg_newBytes((uint8_t *)b, size_needed);
  794. pikaFree(b, size_needed);
  795. return arg;
  796. }
  797. if (strEqu(encoding, "utf-32"))
  798. {
  799. int size_needed = __utf8_to_utf32_LE_noBOM_get_size(str, len);
  800. if (size_needed <= 0)
  801. {
  802. return NULL;
  803. }
  804. size_needed += 4;
  805. char *b = (char *)pikaMalloc(size_needed);
  806. if (!b)
  807. {
  808. return NULL;
  809. }
  810. int ok = __utf8_to_utf32_LE_withBOM(str, len, b);
  811. if (ok < 0)
  812. {
  813. pikaFree(b, size_needed);
  814. return NULL;
  815. }
  816. Arg *arg = arg_newBytes((uint8_t *)b, size_needed);
  817. pikaFree(b, size_needed);
  818. return arg;
  819. }
  820. return NULL;
  821. }
  822. char* _strlwr(char *str)
  823. {
  824. int i = 0;
  825. while (str[i] != '\0')
  826. {
  827. str[i] = tolower((int)str[i]);
  828. i++;
  829. }
  830. return str;
  831. }
  832. #endif