modujson.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2014-2016 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <stdio.h>
  27. #include "py/objlist.h"
  28. #include "py/objstringio.h"
  29. #include "py/parsenum.h"
  30. #include "py/runtime.h"
  31. #include "py/stream.h"
  32. #if MICROPY_PY_UJSON
  33. STATIC mp_obj_t mod_ujson_dump(mp_obj_t obj, mp_obj_t stream) {
  34. if (!MP_OBJ_IS_OBJ(stream)) {
  35. mp_raise_TypeError(NULL);
  36. }
  37. mp_print_t print = {MP_OBJ_TO_PTR(stream), mp_stream_write_adaptor};
  38. mp_obj_print_helper(&print, obj, PRINT_JSON);
  39. return mp_const_none;
  40. }
  41. STATIC MP_DEFINE_CONST_FUN_OBJ_2(mod_ujson_dump_obj, mod_ujson_dump);
  42. STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
  43. vstr_t vstr;
  44. mp_print_t print;
  45. vstr_init_print(&vstr, 8, &print);
  46. mp_obj_print_helper(&print, obj, PRINT_JSON);
  47. return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
  48. }
  49. STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
  50. // The function below implements a simple non-recursive JSON parser.
  51. //
  52. // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
  53. // The parser here will parse any valid JSON and return the correct
  54. // corresponding Python object. It allows through a superset of JSON, since
  55. // it treats commas and colons as "whitespace", and doesn't care if
  56. // brackets/braces are correctly paired. It will raise a ValueError if the
  57. // input is outside it's specs.
  58. //
  59. // Most of the work is parsing the primitives (null, false, true, numbers,
  60. // strings). It does 1 pass over the input stream. It tries to be fast and
  61. // small in code size, while not using more RAM than necessary.
  62. typedef struct _ujson_stream_t {
  63. mp_obj_t stream_obj;
  64. mp_uint_t (*read)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode);
  65. int errcode;
  66. byte cur;
  67. } ujson_stream_t;
  68. #define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker
  69. #define S_END(s) ((s).cur == S_EOF)
  70. #define S_CUR(s) ((s).cur)
  71. #define S_NEXT(s) (ujson_stream_next(&(s)))
  72. STATIC byte ujson_stream_next(ujson_stream_t *s) {
  73. mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode);
  74. if (s->errcode != 0) {
  75. mp_raise_OSError(s->errcode);
  76. }
  77. if (ret == 0) {
  78. s->cur = S_EOF;
  79. }
  80. return s->cur;
  81. }
  82. STATIC mp_obj_t mod_ujson_load(mp_obj_t stream_obj) {
  83. const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
  84. ujson_stream_t s = {stream_obj, stream_p->read, 0, 0};
  85. vstr_t vstr;
  86. vstr_init(&vstr, 8);
  87. mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
  88. stack.len = 0;
  89. stack.items = NULL;
  90. mp_obj_t stack_top = MP_OBJ_NULL;
  91. mp_obj_type_t *stack_top_type = NULL;
  92. mp_obj_t stack_key = MP_OBJ_NULL;
  93. S_NEXT(s);
  94. for (;;) {
  95. cont:
  96. if (S_END(s)) {
  97. break;
  98. }
  99. mp_obj_t next = MP_OBJ_NULL;
  100. bool enter = false;
  101. byte cur = S_CUR(s);
  102. S_NEXT(s);
  103. switch (cur) {
  104. case ',':
  105. case ':':
  106. case ' ':
  107. case '\t':
  108. case '\n':
  109. case '\r':
  110. goto cont;
  111. case 'n':
  112. if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') {
  113. S_NEXT(s);
  114. next = mp_const_none;
  115. } else {
  116. goto fail;
  117. }
  118. break;
  119. case 'f':
  120. if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') {
  121. S_NEXT(s);
  122. next = mp_const_false;
  123. } else {
  124. goto fail;
  125. }
  126. break;
  127. case 't':
  128. if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') {
  129. S_NEXT(s);
  130. next = mp_const_true;
  131. } else {
  132. goto fail;
  133. }
  134. break;
  135. case '"':
  136. vstr_reset(&vstr);
  137. for (; !S_END(s) && S_CUR(s) != '"';) {
  138. byte c = S_CUR(s);
  139. if (c == '\\') {
  140. c = S_NEXT(s);
  141. switch (c) {
  142. case 'b': c = 0x08; break;
  143. case 'f': c = 0x0c; break;
  144. case 'n': c = 0x0a; break;
  145. case 'r': c = 0x0d; break;
  146. case 't': c = 0x09; break;
  147. case 'u': {
  148. mp_uint_t num = 0;
  149. for (int i = 0; i < 4; i++) {
  150. c = (S_NEXT(s) | 0x20) - '0';
  151. if (c > 9) {
  152. c -= ('a' - ('9' + 1));
  153. }
  154. num = (num << 4) | c;
  155. }
  156. vstr_add_char(&vstr, num);
  157. goto str_cont;
  158. }
  159. }
  160. }
  161. vstr_add_byte(&vstr, c);
  162. str_cont:
  163. S_NEXT(s);
  164. }
  165. if (S_END(s)) {
  166. goto fail;
  167. }
  168. S_NEXT(s);
  169. next = mp_obj_new_str(vstr.buf, vstr.len);
  170. break;
  171. case '-':
  172. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
  173. bool flt = false;
  174. vstr_reset(&vstr);
  175. for (;;) {
  176. vstr_add_byte(&vstr, cur);
  177. cur = S_CUR(s);
  178. if (cur == '.' || cur == 'E' || cur == 'e') {
  179. flt = true;
  180. } else if (cur == '-' || unichar_isdigit(cur)) {
  181. // pass
  182. } else {
  183. break;
  184. }
  185. S_NEXT(s);
  186. }
  187. if (flt) {
  188. next = mp_parse_num_decimal(vstr.buf, vstr.len, false, false, NULL);
  189. } else {
  190. next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
  191. }
  192. break;
  193. }
  194. case '[':
  195. next = mp_obj_new_list(0, NULL);
  196. enter = true;
  197. break;
  198. case '{':
  199. next = mp_obj_new_dict(0);
  200. enter = true;
  201. break;
  202. case '}':
  203. case ']': {
  204. if (stack_top == MP_OBJ_NULL) {
  205. // no object at all
  206. goto fail;
  207. }
  208. if (stack.len == 0) {
  209. // finished; compound object
  210. goto success;
  211. }
  212. stack.len -= 1;
  213. stack_top = stack.items[stack.len];
  214. stack_top_type = mp_obj_get_type(stack_top);
  215. goto cont;
  216. }
  217. default:
  218. goto fail;
  219. }
  220. if (stack_top == MP_OBJ_NULL) {
  221. stack_top = next;
  222. stack_top_type = mp_obj_get_type(stack_top);
  223. if (!enter) {
  224. // finished; single primitive only
  225. goto success;
  226. }
  227. } else {
  228. // append to list or dict
  229. if (stack_top_type == &mp_type_list) {
  230. mp_obj_list_append(stack_top, next);
  231. } else {
  232. if (stack_key == MP_OBJ_NULL) {
  233. stack_key = next;
  234. if (enter) {
  235. goto fail;
  236. }
  237. } else {
  238. mp_obj_dict_store(stack_top, stack_key, next);
  239. stack_key = MP_OBJ_NULL;
  240. }
  241. }
  242. if (enter) {
  243. if (stack.items == NULL) {
  244. mp_obj_list_init(&stack, 1);
  245. stack.items[0] = stack_top;
  246. } else {
  247. mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top);
  248. }
  249. stack_top = next;
  250. stack_top_type = mp_obj_get_type(stack_top);
  251. }
  252. }
  253. }
  254. success:
  255. // eat trailing whitespace
  256. while (unichar_isspace(S_CUR(s))) {
  257. S_NEXT(s);
  258. }
  259. if (!S_END(s)) {
  260. // unexpected chars
  261. goto fail;
  262. }
  263. if (stack_top == MP_OBJ_NULL || stack.len != 0) {
  264. // not exactly 1 object
  265. goto fail;
  266. }
  267. vstr_clear(&vstr);
  268. return stack_top;
  269. fail:
  270. mp_raise_ValueError("syntax error in JSON");
  271. }
  272. STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_load_obj, mod_ujson_load);
  273. STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
  274. size_t len;
  275. const char *buf = mp_obj_str_get_data(obj, &len);
  276. vstr_t vstr = {len, len, (char*)buf, true};
  277. mp_obj_stringio_t sio = {{&mp_type_stringio}, &vstr, 0, MP_OBJ_NULL};
  278. return mod_ujson_load(MP_OBJ_FROM_PTR(&sio));
  279. }
  280. STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_loads_obj, mod_ujson_loads);
  281. STATIC const mp_rom_map_elem_t mp_module_ujson_globals_table[] = {
  282. { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_ujson) },
  283. { MP_ROM_QSTR(MP_QSTR_dump), MP_ROM_PTR(&mod_ujson_dump_obj) },
  284. { MP_ROM_QSTR(MP_QSTR_dumps), MP_ROM_PTR(&mod_ujson_dumps_obj) },
  285. { MP_ROM_QSTR(MP_QSTR_load), MP_ROM_PTR(&mod_ujson_load_obj) },
  286. { MP_ROM_QSTR(MP_QSTR_loads), MP_ROM_PTR(&mod_ujson_loads_obj) },
  287. };
  288. STATIC MP_DEFINE_CONST_DICT(mp_module_ujson_globals, mp_module_ujson_globals_table);
  289. const mp_obj_module_t mp_module_ujson = {
  290. .base = { &mp_type_module },
  291. .globals = (mp_obj_dict_t*)&mp_module_ujson_globals,
  292. };
  293. #endif //MICROPY_PY_UJSON