PikaStdData_String.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808
  1. #include "PikaStdData_String.h"
  2. #include "PikaStdData_List.h"
  3. #include "PikaStdData_String_Util.h"
  4. #include "dataStrs.h"
  5. char* _strlwr(char* str);
  6. static int string_len(char* str);
  7. Arg* PikaStdData_String___iter__(PikaObj* self) {
  8. obj_setInt(self, "__iter_i", 0);
  9. return arg_newRef(self);
  10. }
  11. void PikaStdData_String_set(PikaObj* self, char* s) {
  12. #if PIKA_STRING_UTF8_ENABLE
  13. int r = _valid_utf8(s, -1);
  14. if (r >= 0) {
  15. obj_setErrorCode(self, __LINE__);
  16. __platform_printf("Error invaliad character %x\r\n", s[r]);
  17. return;
  18. }
  19. #endif
  20. obj_setStr(self, "str", s);
  21. }
  22. void PikaStdData_String___init__(PikaObj* self, char* s) {
  23. #if PIKA_STRING_UTF8_ENABLE
  24. int r = _valid_utf8(s, -1);
  25. if (r >= 0) {
  26. obj_setErrorCode(self, __LINE__);
  27. __platform_printf("Error invaliad character %x\r\n", s[r]);
  28. return;
  29. }
  30. #endif
  31. PikaStdData_String_set(self, s);
  32. }
  33. char* PikaStdData_String_get(PikaObj* self) {
  34. return obj_getStr(self, "str");
  35. }
  36. Arg* PikaStdData_String___next__(PikaObj* self) {
  37. int __iter_i = args_getInt(self->list, "__iter_i");
  38. char* str = obj_getStr(self, "str");
  39. uint16_t len = strGetSize(str);
  40. #if PIKA_STRING_UTF8_ENABLE
  41. char char_buff[5];
  42. int r = _utf8_get(str, len, __iter_i, char_buff);
  43. if (r < 0) {
  44. return arg_newNull();
  45. }
  46. args_setInt(self->list, "__iter_i", __iter_i + 1);
  47. return arg_newStr((char*)char_buff);
  48. #else
  49. Arg* res = NULL;
  50. char char_buff[] = " ";
  51. if (__iter_i < len) {
  52. char_buff[0] = str[__iter_i];
  53. res = arg_newStr((char*)char_buff);
  54. } else {
  55. return arg_newNull();
  56. }
  57. args_setInt(self->list, "__iter_i", __iter_i + 1);
  58. return res;
  59. #endif
  60. }
  61. static int _str_get(char* str, int key_i, char* char_buff) {
  62. uint16_t len = strGetSize(str);
  63. if (key_i < 0) {
  64. key_i = string_len(str) + key_i;
  65. }
  66. #if PIKA_STRING_UTF8_ENABLE
  67. return _utf8_get(str, len, key_i, char_buff);
  68. #else
  69. if (key_i < len) {
  70. char_buff[0] = str[key_i];
  71. return 0;
  72. }
  73. return -1;
  74. #endif
  75. }
  76. char* string_slice(Args* outBuffs, char* str, int start, int end) {
  77. char* res = args_getBuff(outBuffs, strGetSize(str));
  78. if (start < 0) {
  79. start += string_len(str);
  80. }
  81. if (end < 0) {
  82. end += string_len(str) + 1;
  83. }
  84. for (int i = start; i < end; i++) {
  85. char char_buff[5] = {0};
  86. int r = _str_get(str, i, char_buff);
  87. if (r < 0) {
  88. return NULL;
  89. }
  90. res = strAppend(res, char_buff);
  91. }
  92. return res;
  93. }
  94. Arg* PikaStdData_String___getitem__(PikaObj* self, Arg* __key) {
  95. int key_i = arg_getInt(__key);
  96. char* str = obj_getStr(self, "str");
  97. char char_buff[5] = {0};
  98. int r = _str_get(str, key_i, char_buff);
  99. if (r < 0) {
  100. return arg_newNull();
  101. }
  102. return arg_newStr((char*)char_buff);
  103. }
  104. void PikaStdData_String___setitem__(PikaObj* self, Arg* __key, Arg* __val) {
  105. int key_i = arg_getInt(__key);
  106. char* str = obj_getStr(self, "str");
  107. char* val = arg_getStr(__val);
  108. uint16_t len = strGetSize(str);
  109. #if PIKA_STRING_UTF8_ENABLE
  110. int len2 = strlen(val);
  111. int is_invalid = _valid_utf8(val, len2);
  112. if (is_invalid >= 0) {
  113. obj_setErrorCode(self, __LINE__);
  114. __platform_printf("Error String invalid\r\n");
  115. return;
  116. }
  117. int ulen_val = _utf8_strlen(val, len2);
  118. if (ulen_val != 1) {
  119. obj_setErrorCode(self, __LINE__);
  120. __platform_printf("Error String invalid char\r\n");
  121. return;
  122. }
  123. int char_len;
  124. int repl_at = _utf8_get_offset(str, len, key_i, &char_len);
  125. if (repl_at < 0) {
  126. obj_setErrorCode(self, __LINE__);
  127. __platform_printf("Error String Overflow\r\n");
  128. return;
  129. }
  130. int ok = __str_repl(self, str, len, repl_at, char_len, val, len2);
  131. if (ok < 0) {
  132. obj_setErrorCode(self, __LINE__);
  133. __platform_printf("Error. Internal error(-%d)\r\n", __LINE__);
  134. return;
  135. }
  136. #else
  137. if (key_i >= len) {
  138. obj_setErrorCode(self, 1);
  139. __platform_printf("Error String Overflow\r\n");
  140. return;
  141. }
  142. str[key_i] = val[0];
  143. #endif
  144. }
  145. char* PikaStdData_String___str__(PikaObj* self) {
  146. return obj_getStr(self, "str");
  147. }
  148. int PikaStdData_String_startswith(PikaObj* self, char* prefix) {
  149. char* str = obj_getStr(self, "str");
  150. char* p = prefix;
  151. int i = 0;
  152. while (*p != '\0') {
  153. if (*p != str[i])
  154. return 0;
  155. p++;
  156. i++;
  157. }
  158. return 1;
  159. }
  160. int PikaStdData_String_endswith(PikaObj* self, char* suffix) {
  161. char* str = obj_getStr(self, "str");
  162. int len1 = strlen(str);
  163. int len2 = strlen(suffix);
  164. while (len2 >= 1) {
  165. if (suffix[len2 - 1] != str[len1 - 1])
  166. return 0;
  167. len2--;
  168. len1--;
  169. }
  170. return 1;
  171. }
  172. int PikaStdData_String_isdigit(PikaObj* self) {
  173. char* str = obj_getStr(self, "str");
  174. int i = 0;
  175. while (str[i] != '\0') {
  176. if (!isdigit((int)str[i]))
  177. return 0;
  178. i++;
  179. }
  180. return 1;
  181. }
  182. int PikaStdData_String_islower(PikaObj* self) {
  183. char* str = obj_getStr(self, "str");
  184. int i = 0;
  185. while (str[i] != '\0') {
  186. if (!islower((int)str[i]))
  187. return 0;
  188. i++;
  189. }
  190. return 1;
  191. }
  192. int PikaStdData_String_isalnum(PikaObj* self) {
  193. char* str = obj_getStr(self, "str");
  194. int i = 0;
  195. while (str[i] != '\0') {
  196. if (!isalnum((int)str[i]))
  197. return 0;
  198. i++;
  199. }
  200. return 1;
  201. }
  202. int PikaStdData_String_isalpha(PikaObj* self) {
  203. char* str = obj_getStr(self, "str");
  204. int i = 0;
  205. while (str[i] != '\0') {
  206. if (!isalpha((int)str[i]))
  207. return 0;
  208. i++;
  209. }
  210. return 1;
  211. }
  212. int PikaStdData_String_isspace(PikaObj* self) {
  213. char* str = obj_getStr(self, "str");
  214. int i = 0;
  215. while (str[i] != '\0') {
  216. if (!isspace((int)str[i]))
  217. return 0;
  218. i++;
  219. }
  220. return 1;
  221. }
  222. PikaObj* PikaStdData_String_split(PikaObj* self, char* s) {
  223. /* 创建 list 对象 */
  224. PikaObj* list = newNormalObj(New_PikaStdData_List);
  225. /* 初始化 list */
  226. PikaStdData_List___init__(list);
  227. Args buffs = {0};
  228. char* str = strsCopy(&buffs, obj_getStr(self, "str"));
  229. char sign = s[0];
  230. int token_num = strCountSign(str, sign) + 1;
  231. for (int i = 0; i < token_num; i++) {
  232. char* token = strsPopToken(&buffs, &str, sign);
  233. /* 用 arg_set<type> 的 api 创建 arg */
  234. Arg* token_arg = arg_newStr(token);
  235. /* 添加到 list 对象 */
  236. PikaStdData_List_append(list, token_arg);
  237. /* 销毁 arg */
  238. arg_deinit(token_arg);
  239. }
  240. strsDeinit(&buffs);
  241. return list;
  242. }
  243. static int string_len(char* str) {
  244. #if PIKA_STRING_UTF8_ENABLE
  245. int n = _utf8_strlen(str, -1);
  246. return n;
  247. #else
  248. return strGetSize(str);
  249. #endif
  250. }
  251. int PikaStdData_String___len__(PikaObj* self) {
  252. char* str = obj_getStr(self, "str");
  253. int n = string_len(str);
  254. if (n < 0) {
  255. obj_setErrorCode(self, __LINE__);
  256. __platform_printf("Error. Internal error(%d)\r\n", __LINE__);
  257. }
  258. return n;
  259. }
  260. char* PikaStdData_String_strip(PikaObj* self, PikaTuple* chrs) {
  261. Args buffs = {0};
  262. char to_strip = ' ';
  263. if (tuple_getSize(chrs) > 1) {
  264. obj_setErrorCode(self, PIKA_RES_ERR_INVALID_PARAM);
  265. obj_setSysOut(self, "Error. Invalid param");
  266. }
  267. if (tuple_getSize(chrs) == 1) {
  268. char* ch_str = tuple_getStr(chrs, 0);
  269. to_strip = ch_str[0];
  270. }
  271. char* str = strsCopy(&buffs, obj_getStr(self, "str"));
  272. /* strip */
  273. char* str_start = str;
  274. size_t len = strlen(str);
  275. for (size_t i = 0; i < len; i++) {
  276. if (str[i] != to_strip) {
  277. str_start = (char*)(str + i);
  278. break;
  279. }
  280. }
  281. len = strlen(str);
  282. for (int i = len - 1; i >= 0; i--) {
  283. if (str[i] != to_strip) {
  284. str[i + 1] = '\0';
  285. break;
  286. }
  287. }
  288. obj_setStr(self, "_buf", str_start);
  289. strsDeinit(&buffs);
  290. return obj_getStr(self, "_buf");
  291. }
  292. char* PikaStdData_String_replace(PikaObj* self, char* old, char* new) {
  293. Args buffs = {0};
  294. char* str = strsCopy(&buffs, obj_getStr(self, "str"));
  295. str = strsReplace(&buffs, str, old, new);
  296. obj_setStr(self, "_buf", str);
  297. strsDeinit(&buffs);
  298. return obj_getStr(self, "_buf");
  299. }
  300. Arg* PikaStdData_String_encode(PikaObj* self, PikaTuple* encoding) {
  301. char* str = obj_getStr(self, "str");
  302. #if PIKA_STRING_UTF8_ENABLE
  303. char* to_code = NULL;
  304. int argn = tuple_getSize(encoding);
  305. if (argn < 1) {
  306. return arg_newBytes((uint8_t*)str, strGetSize(str));
  307. }
  308. Arg* arg_i = tuple_getArg(encoding, 0);
  309. if (arg_getType(arg_i) != ARG_TYPE_STRING) {
  310. obj_setErrorCode(self, __LINE__);
  311. __platform_printf("Error invaliad arguments\r\n");
  312. return NULL;
  313. }
  314. to_code = arg_getStr(arg_i);
  315. _strlwr(to_code);
  316. Arg* res = _str_encode(str, to_code);
  317. if (!res) {
  318. obj_setErrorCode(self, __LINE__);
  319. __platform_printf("Error internal error\r\n");
  320. return NULL;
  321. }
  322. return res;
  323. #else
  324. return arg_newBytes((uint8_t*)str, strGetSize(str));
  325. #endif
  326. }
  327. #if PIKA_STRING_UTF8_ENABLE
  328. static const uint8_t _pcre_utf8_table4[] = {
  329. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  330. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  331. 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5};
  332. const char mask1 = 0x80;
  333. const char mask2 = 0xc0;
  334. const char mask3 = 0xe0;
  335. const char mask4 = 0xf0;
  336. const char nmask1 = 0x3f;
  337. const char nmask2 = 0x1f;
  338. const char nmask3 = 0x0f;
  339. const char nmask4 = 0x07;
  340. int _valid_utf8(const char* string, int length) {
  341. const uint8_t* p;
  342. if (length < 0) {
  343. length = strlen(string);
  344. }
  345. for (p = (const uint8_t*)string; length-- > 0; p++) {
  346. int ab;
  347. int c = *p;
  348. if (!(c & 0x80))
  349. continue;
  350. if (c < 0xc0)
  351. return (uintptr_t)p - (uintptr_t)string;
  352. ab = _pcre_utf8_table4[c & 0x3f];
  353. if (length < ab || ab > 3)
  354. return (uintptr_t)p - (uintptr_t)string;
  355. length -= ab;
  356. if ((*(++p) & 0xc0) != 0x80)
  357. return (uintptr_t)p - (uintptr_t)string;
  358. switch (ab) {
  359. case 1:
  360. if ((c & 0x3e) == 0)
  361. return (uintptr_t)p - (uintptr_t)string;
  362. continue;
  363. case 2:
  364. if ((c == 0xe0 && (*p & 0x20) == 0) ||
  365. (c == 0xed && *p >= 0xa0))
  366. return (uintptr_t)p - (uintptr_t)string;
  367. break;
  368. case 3:
  369. if ((c == 0xf0 && (*p & 0x30) == 0) || (c > 0xf4) ||
  370. (c == 0xf4 && *p > 0x8f))
  371. return (uintptr_t)p - (uintptr_t)string;
  372. break;
  373. }
  374. while (--ab > 0) {
  375. if ((*(++p) & 0xc0) != 0x80)
  376. return (uintptr_t)p - (uintptr_t)string;
  377. }
  378. }
  379. return -1;
  380. }
  381. int _utf8_get(const char* string, int length, int at, char* out_buf) {
  382. const uint8_t* p;
  383. int ab, c;
  384. if (length < 0) {
  385. length = strlen(string);
  386. }
  387. if (at < 0 || at >= length)
  388. return -1;
  389. for (p = (const uint8_t*)string; length > 0 && at; p++, at--) {
  390. c = *p;
  391. if (!(c & 0x80)) {
  392. length--;
  393. continue;
  394. }
  395. ab = _pcre_utf8_table4[c & 0x3f];
  396. p += ab++;
  397. length -= ab;
  398. }
  399. if (at || length <= 0)
  400. return -2;
  401. c = *p;
  402. if (!(c & 0x80)) {
  403. *out_buf = c;
  404. out_buf[1] = 0;
  405. return 1;
  406. };
  407. ab = _pcre_utf8_table4[c & 0x3f] + 1;
  408. __platform_memcpy(out_buf, p, ab);
  409. out_buf[ab] = '\0';
  410. return ab;
  411. }
  412. int _utf8_get_offset(const char* string,
  413. int length,
  414. int at,
  415. int* out_char_len) {
  416. const uint8_t* p;
  417. int ab, c;
  418. if (length < 0) {
  419. length = strlen(string);
  420. }
  421. if (at < 0 || at >= length)
  422. return -1;
  423. for (p = (const uint8_t*)string; length > 0 && at; p++, at--) {
  424. c = *p;
  425. if (!(c & 0x80)) {
  426. length--;
  427. continue;
  428. }
  429. ab = _pcre_utf8_table4[c & 0x3f];
  430. p += ab++;
  431. length -= ab;
  432. }
  433. if (at)
  434. return -2;
  435. c = *p;
  436. if (!(c & 0x80)) {
  437. if (out_char_len)
  438. *out_char_len = 1;
  439. return (uintptr_t)p - (uintptr_t)string;
  440. };
  441. ab = _pcre_utf8_table4[c & 0x3f] + 1;
  442. if (out_char_len)
  443. *out_char_len = ab;
  444. return (uintptr_t)p - (uintptr_t)string;
  445. }
  446. int _utf8_strlen(const char* string, int length) {
  447. const uint8_t* p;
  448. int i, ab, c;
  449. if (length < 0) {
  450. length = strlen(string);
  451. }
  452. for (i = 0, p = (const uint8_t*)string; length > 0; i++, p++) {
  453. c = *p;
  454. if (!(c & 0x80)) {
  455. length--;
  456. continue;
  457. }
  458. ab = _pcre_utf8_table4[c & 0x3f];
  459. p += ab++;
  460. length -= ab;
  461. }
  462. if (length < 0)
  463. return -1;
  464. return i;
  465. }
  466. int __str_repl(PikaObj* self,
  467. char* str,
  468. int str_len,
  469. int repl_at,
  470. int repl_len,
  471. char* val,
  472. int val_len) {
  473. if (val_len > repl_len) {
  474. str[repl_at] = 0;
  475. Arg* s_new = arg_newStr(str);
  476. if (!s_new)
  477. return -1;
  478. s_new = arg_strAppend(s_new, val);
  479. s_new = arg_strAppend(s_new, str + repl_at + repl_len);
  480. obj_removeArg(self, "str");
  481. int rs = obj_setArg(self, "str", s_new);
  482. arg_deinit(s_new);
  483. if (rs)
  484. return -rs;
  485. return 0;
  486. }
  487. char* s = str + repl_at;
  488. __platform_memcpy(s, val, val_len);
  489. __platform_memmove(s + val_len, s + repl_len,
  490. str_len - repl_at - repl_len + 1);
  491. return 0;
  492. }
  493. int __utf8_to_utf32_char_LE(const char* utf8, char* out_buf) {
  494. char c = *utf8;
  495. if (!(c & mask1)) {
  496. *out_buf = c;
  497. out_buf[1] = 0;
  498. out_buf[2] = 0;
  499. out_buf[3] = 0;
  500. return 1;
  501. }
  502. int left_length = _pcre_utf8_table4[c & 0x3f];
  503. char a, b, d;
  504. switch (left_length) {
  505. case 1:
  506. a = c & nmask2;
  507. b = utf8[1] & nmask1;
  508. out_buf[0] = b | a << 6;
  509. out_buf[1] = a >> 2;
  510. out_buf[2] = 0;
  511. out_buf[3] = 0;
  512. return 2;
  513. case 2:
  514. a = c & nmask3;
  515. b = utf8[1] & nmask1;
  516. c = utf8[2] & nmask1;
  517. out_buf[0] = c | b << 6;
  518. out_buf[1] = b >> 2 | a << 4;
  519. out_buf[2] = 0;
  520. out_buf[3] = 0;
  521. return 3;
  522. case 3:
  523. a = c & nmask4;
  524. b = utf8[1] & nmask1;
  525. c = utf8[2] & nmask1;
  526. d = utf8[3] & nmask1;
  527. out_buf[0] = d | c << 6;
  528. out_buf[1] = c >> 2 | b << 4;
  529. out_buf[2] = b >> 4 | a << 2;
  530. out_buf[3] = 0;
  531. return 4;
  532. default:
  533. return 0;
  534. }
  535. }
  536. int __utf8_to_utf32_LE_noBOM_get_size(const char* utf8, int len) {
  537. char* p = (char*)utf8;
  538. char buf[4];
  539. int space_sum = 0;
  540. while (len > 0) {
  541. int size = __utf8_to_utf32_char_LE(p, buf);
  542. if (!size)
  543. return -1;
  544. p += size;
  545. len -= size;
  546. space_sum++;
  547. }
  548. return space_sum * 4;
  549. }
  550. int __utf8_to_utf32_LE_noBOM(const char* utf8, int len, char* out_buf) {
  551. char* q = out_buf;
  552. char* p = (char*)utf8;
  553. while (len > 0) {
  554. int size = __utf8_to_utf32_char_LE(p, q);
  555. if (!size)
  556. return -1;
  557. p += size;
  558. len -= size;
  559. q += 4;
  560. }
  561. return q - out_buf;
  562. }
  563. int __utf8_to_utf32_LE_withBOM(const char* utf8, int len, char* out_buf) {
  564. int size = __utf8_to_utf32_LE_noBOM(utf8, len, out_buf + 4);
  565. if (size < 0) {
  566. return size;
  567. }
  568. out_buf[0] = '\xff';
  569. out_buf[1] = '\xfe';
  570. out_buf[2] = 0;
  571. out_buf[3] = 0;
  572. return size + 4;
  573. }
  574. int32_t __utf8_decode(const char* utf8, int left_length) {
  575. int ucode = -1;
  576. char c = *utf8;
  577. if (!(c & mask1)) {
  578. return c;
  579. }
  580. char a, b, d;
  581. switch (left_length) {
  582. case 1:
  583. a = c & nmask2;
  584. b = utf8[1] & nmask1;
  585. ucode = b | (a & 0x03) << 6;
  586. ucode |= (a >> 2) << 8;
  587. break;
  588. case 2:
  589. a = c & nmask3;
  590. b = utf8[1] & nmask1;
  591. c = utf8[2] & nmask1;
  592. ucode = c | (b & 0x03) << 6;
  593. ucode |= (b >> 2 | a << 4) << 8;
  594. break;
  595. case 3:
  596. a = c & nmask4;
  597. b = utf8[1] & nmask1;
  598. c = utf8[2] & nmask1;
  599. d = utf8[3] & nmask1;
  600. ucode = d | (c & 0x03) << 6;
  601. ucode |= (c >> 2 | (b & 0x0f) << 4) << 8;
  602. ucode |= (b >> 4 | a << 2) << 16;
  603. break;
  604. default:
  605. return -1;
  606. }
  607. return ucode;
  608. }
  609. int __unicode_to_utf16_char_LE(int32_t u, char* out_buf) {
  610. if (!(u & 0xffff0000)) {
  611. out_buf[0] = u & 0xff;
  612. out_buf[1] = (u & 0xff00) >> 8;
  613. return 2;
  614. }
  615. int32_t d = u - 0x10000;
  616. int32_t L = d & 0x3ff;
  617. int32_t U = d >> 10;
  618. L = L | 0xdc00;
  619. U = U | 0xd800;
  620. out_buf[0] = U & 0xff;
  621. out_buf[1] = (U & 0xff00) >> 8;
  622. out_buf[2] = L & 0xff;
  623. out_buf[3] = (L & 0xff00) >> 8;
  624. return 4;
  625. }
  626. int __utf8_to_utf16_LE_noBOM(const char* utf8, int len, char* out_buf) {
  627. char* q = out_buf;
  628. char* p = (char*)utf8;
  629. while (len > 0) {
  630. char c = *p;
  631. int32_t ucode;
  632. if (!(c & mask1)) {
  633. ucode = c;
  634. p++;
  635. len--;
  636. } else {
  637. int left_size = _pcre_utf8_table4[c & 0x3f];
  638. ucode = __utf8_decode(p, left_size++);
  639. if (ucode < 0)
  640. return ucode;
  641. p += left_size;
  642. len -= left_size;
  643. }
  644. int size = __unicode_to_utf16_char_LE(ucode, q);
  645. q += size;
  646. }
  647. return q - out_buf;
  648. }
  649. int __utf8_to_utf16_LE_noBOM_get_size(const char* utf8, int len) {
  650. char out_buf[4];
  651. char* p = (char*)utf8;
  652. int need_space = 0;
  653. while (len > 0) {
  654. char c = *p;
  655. int32_t ucode;
  656. if (!(c & mask1)) {
  657. ucode = c;
  658. p++;
  659. len--;
  660. } else {
  661. int left_size = _pcre_utf8_table4[c & 0x3f];
  662. ucode = __utf8_decode(p, left_size++);
  663. if (ucode < 0)
  664. return ucode;
  665. p += left_size;
  666. len -= left_size;
  667. }
  668. int size = __unicode_to_utf16_char_LE(ucode, out_buf);
  669. need_space += size;
  670. }
  671. return need_space;
  672. }
  673. int __utf8_to_utf16_LE_withBOM(const char* utf8, int len, char* out_buf) {
  674. int size = __utf8_to_utf16_LE_noBOM(utf8, len, out_buf + 2);
  675. if (size < 0) {
  676. return size;
  677. }
  678. out_buf[0] = '\xff';
  679. out_buf[1] = '\xfe';
  680. return size + 2;
  681. }
  682. Arg* _str_encode(char* str, char* encoding) {
  683. if (strEqu(encoding, "utf-8")) {
  684. return arg_newBytes((uint8_t*)str, strGetSize(str));
  685. }
  686. int len = strlen(str);
  687. if (strEqu(encoding, "ascii")) {
  688. int ulen = _utf8_strlen(str, len);
  689. if (ulen == len) {
  690. return arg_newBytes((uint8_t*)str, strGetSize(str));
  691. }
  692. __platform_printf("Warning there is non-ascii characters\r\n");
  693. char* b = (char*)pikaMalloc(len + 1);
  694. if (!b) {
  695. return NULL;
  696. }
  697. char* p = str;
  698. char* q = b;
  699. char c = *p++;
  700. while (c) {
  701. if (!(c & 0x80)) {
  702. *q++ = c;
  703. }
  704. c = *p++;
  705. }
  706. *q = 0;
  707. Arg* arg = arg_newBytes((uint8_t*)b, strGetSize(b));
  708. pikaFree(b, len + 1);
  709. return arg;
  710. }
  711. if (strEqu(encoding, "utf-16")) {
  712. int size_needed = __utf8_to_utf16_LE_noBOM_get_size(str, len);
  713. if (size_needed <= 0) {
  714. return NULL;
  715. }
  716. size_needed += 2;
  717. char* b = (char*)pikaMalloc(size_needed);
  718. if (!b) {
  719. return NULL;
  720. }
  721. int ok = __utf8_to_utf16_LE_withBOM(str, len, b);
  722. if (ok < 0) {
  723. pikaFree(b, size_needed);
  724. return NULL;
  725. }
  726. Arg* arg = arg_newBytes((uint8_t*)b, size_needed);
  727. pikaFree(b, size_needed);
  728. return arg;
  729. }
  730. if (strEqu(encoding, "utf-32")) {
  731. int size_needed = __utf8_to_utf32_LE_noBOM_get_size(str, len);
  732. if (size_needed <= 0) {
  733. return NULL;
  734. }
  735. size_needed += 4;
  736. char* b = (char*)pikaMalloc(size_needed);
  737. if (!b) {
  738. return NULL;
  739. }
  740. int ok = __utf8_to_utf32_LE_withBOM(str, len, b);
  741. if (ok < 0) {
  742. pikaFree(b, size_needed);
  743. return NULL;
  744. }
  745. Arg* arg = arg_newBytes((uint8_t*)b, size_needed);
  746. pikaFree(b, size_needed);
  747. return arg;
  748. }
  749. return NULL;
  750. }
  751. char* _strlwr(char* str) {
  752. int i = 0;
  753. while (str[i] != '\0') {
  754. str[i] = tolower((int)str[i]);
  755. i++;
  756. }
  757. return str;
  758. }
  759. #endif