makeqstrdata.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. """
  2. Process raw qstr file and output qstr data with length, hash and data bytes.
  3. This script works with Python 2.6, 2.7, 3.3 and 3.4.
  4. """
  5. from __future__ import print_function
  6. import re
  7. import sys
  8. # Python 2/3 compatibility:
  9. # - iterating through bytes is different
  10. # - codepoint2name lives in a different module
  11. import platform
  12. if platform.python_version_tuple()[0] == "2":
  13. bytes_cons = lambda val, enc=None: bytearray(val)
  14. from htmlentitydefs import codepoint2name
  15. elif platform.python_version_tuple()[0] == "3":
  16. bytes_cons = bytes
  17. from html.entities import codepoint2name
  18. # end compatibility code
  19. codepoint2name[ord("-")] = "hyphen"
  20. # add some custom names to map characters that aren't in HTML
  21. codepoint2name[ord(" ")] = "space"
  22. codepoint2name[ord("'")] = "squot"
  23. codepoint2name[ord(",")] = "comma"
  24. codepoint2name[ord(".")] = "dot"
  25. codepoint2name[ord(":")] = "colon"
  26. codepoint2name[ord(";")] = "semicolon"
  27. codepoint2name[ord("/")] = "slash"
  28. codepoint2name[ord("%")] = "percent"
  29. codepoint2name[ord("#")] = "hash"
  30. codepoint2name[ord("(")] = "paren_open"
  31. codepoint2name[ord(")")] = "paren_close"
  32. codepoint2name[ord("[")] = "bracket_open"
  33. codepoint2name[ord("]")] = "bracket_close"
  34. codepoint2name[ord("{")] = "brace_open"
  35. codepoint2name[ord("}")] = "brace_close"
  36. codepoint2name[ord("*")] = "star"
  37. codepoint2name[ord("!")] = "bang"
  38. codepoint2name[ord("\\")] = "backslash"
  39. codepoint2name[ord("+")] = "plus"
  40. codepoint2name[ord("$")] = "dollar"
  41. codepoint2name[ord("=")] = "equals"
  42. codepoint2name[ord("?")] = "question"
  43. codepoint2name[ord("@")] = "at_sign"
  44. codepoint2name[ord("^")] = "caret"
  45. codepoint2name[ord("|")] = "pipe"
  46. codepoint2name[ord("~")] = "tilde"
  47. # static qstrs, should be sorted
  48. static_qstr_list = [
  49. "",
  50. "__dir__", # Put __dir__ after empty qstr for builtin dir() to work
  51. "\n",
  52. " ",
  53. "*",
  54. "/",
  55. "<module>",
  56. "_",
  57. "__call__",
  58. "__class__",
  59. "__delitem__",
  60. "__enter__",
  61. "__exit__",
  62. "__getattr__",
  63. "__getitem__",
  64. "__hash__",
  65. "__init__",
  66. "__int__",
  67. "__iter__",
  68. "__len__",
  69. "__main__",
  70. "__module__",
  71. "__name__",
  72. "__new__",
  73. "__next__",
  74. "__qualname__",
  75. "__repr__",
  76. "__setitem__",
  77. "__str__",
  78. "ArithmeticError",
  79. "AssertionError",
  80. "AttributeError",
  81. "BaseException",
  82. "EOFError",
  83. "Ellipsis",
  84. "Exception",
  85. "GeneratorExit",
  86. "ImportError",
  87. "IndentationError",
  88. "IndexError",
  89. "KeyError",
  90. "KeyboardInterrupt",
  91. "LookupError",
  92. "MemoryError",
  93. "NameError",
  94. "NoneType",
  95. "NotImplementedError",
  96. "OSError",
  97. "OverflowError",
  98. "RuntimeError",
  99. "StopIteration",
  100. "SyntaxError",
  101. "SystemExit",
  102. "TypeError",
  103. "ValueError",
  104. "ZeroDivisionError",
  105. "abs",
  106. "all",
  107. "any",
  108. "append",
  109. "args",
  110. "bool",
  111. "builtins",
  112. "bytearray",
  113. "bytecode",
  114. "bytes",
  115. "callable",
  116. "chr",
  117. "classmethod",
  118. "clear",
  119. "close",
  120. "const",
  121. "copy",
  122. "count",
  123. "dict",
  124. "dir",
  125. "divmod",
  126. "end",
  127. "endswith",
  128. "eval",
  129. "exec",
  130. "extend",
  131. "find",
  132. "format",
  133. "from_bytes",
  134. "get",
  135. "getattr",
  136. "globals",
  137. "hasattr",
  138. "hash",
  139. "id",
  140. "index",
  141. "insert",
  142. "int",
  143. "isalpha",
  144. "isdigit",
  145. "isinstance",
  146. "islower",
  147. "isspace",
  148. "issubclass",
  149. "isupper",
  150. "items",
  151. "iter",
  152. "join",
  153. "key",
  154. "keys",
  155. "len",
  156. "list",
  157. "little",
  158. "locals",
  159. "lower",
  160. "lstrip",
  161. "main",
  162. "map",
  163. "micropython",
  164. "next",
  165. "object",
  166. "open",
  167. "ord",
  168. "pop",
  169. "popitem",
  170. "pow",
  171. "print",
  172. "range",
  173. "read",
  174. "readinto",
  175. "readline",
  176. "remove",
  177. "replace",
  178. "repr",
  179. "reverse",
  180. "rfind",
  181. "rindex",
  182. "round",
  183. "rsplit",
  184. "rstrip",
  185. "self",
  186. "send",
  187. "sep",
  188. "set",
  189. "setattr",
  190. "setdefault",
  191. "sort",
  192. "sorted",
  193. "split",
  194. "start",
  195. "startswith",
  196. "staticmethod",
  197. "step",
  198. "stop",
  199. "str",
  200. "strip",
  201. "sum",
  202. "super",
  203. "throw",
  204. "to_bytes",
  205. "tuple",
  206. "type",
  207. "update",
  208. "upper",
  209. "utf-8",
  210. "value",
  211. "values",
  212. "write",
  213. "zip",
  214. ]
  215. # this must match the equivalent function in qstr.c
  216. def compute_hash(qstr, bytes_hash):
  217. hash = 5381
  218. for b in qstr:
  219. hash = (hash * 33) ^ b
  220. # Make sure that valid hash is never zero, zero means "hash not computed"
  221. return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
  222. def qstr_escape(qst):
  223. def esc_char(m):
  224. c = ord(m.group(0))
  225. try:
  226. name = codepoint2name[c]
  227. except KeyError:
  228. name = "0x%02x" % c
  229. return "_" + name + "_"
  230. return re.sub(r"[^A-Za-z0-9_]", esc_char, qst)
  231. def parse_input_headers(infiles):
  232. qcfgs = {}
  233. qstrs = {}
  234. # add static qstrs
  235. for qstr in static_qstr_list:
  236. # work out the corresponding qstr name
  237. ident = qstr_escape(qstr)
  238. # don't add duplicates
  239. assert ident not in qstrs
  240. # add the qstr to the list, with order number to retain original order in file
  241. order = len(qstrs) - 300000
  242. qstrs[ident] = (order, ident, qstr)
  243. # read the qstrs in from the input files
  244. for infile in infiles:
  245. with open(infile, "rt") as f:
  246. for line in f:
  247. line = line.strip()
  248. # is this a config line?
  249. match = re.match(r"^QCFG\((.+), (.+)\)", line)
  250. if match:
  251. value = match.group(2)
  252. if value[0] == "(" and value[-1] == ")":
  253. # strip parenthesis from config value
  254. value = value[1:-1]
  255. qcfgs[match.group(1)] = value
  256. continue
  257. # is this a QSTR line?
  258. match = re.match(r"^Q\((.*)\)$", line)
  259. if not match:
  260. continue
  261. # get the qstr value
  262. qstr = match.group(1)
  263. # special cases to specify control characters
  264. if qstr == "\\n":
  265. qstr = "\n"
  266. elif qstr == "\\r\\n":
  267. qstr = "\r\n"
  268. # work out the corresponding qstr name
  269. ident = qstr_escape(qstr)
  270. # don't add duplicates
  271. if ident in qstrs:
  272. continue
  273. # add the qstr to the list, with order number to retain original order in file
  274. order = len(qstrs)
  275. # but put special method names like __add__ at the top of list, so
  276. # that their id's fit into a byte
  277. if ident == "":
  278. # Sort empty qstr above all still
  279. order = -200000
  280. elif ident == "__dir__":
  281. # Put __dir__ after empty qstr for builtin dir() to work
  282. order = -190000
  283. elif ident.startswith("__"):
  284. order -= 100000
  285. qstrs[ident] = (order, ident, qstr)
  286. if not qcfgs:
  287. sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
  288. sys.exit(1)
  289. return qcfgs, qstrs
  290. def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
  291. qbytes = bytes_cons(qstr, "utf8")
  292. qlen = len(qbytes)
  293. qhash = compute_hash(qbytes, cfg_bytes_hash)
  294. if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
  295. # qstr is all printable ASCII so render it as-is (for easier debugging)
  296. qdata = qstr
  297. else:
  298. # qstr contains non-printable codes so render entire thing as hex pairs
  299. qdata = "".join(("\\x%02x" % b) for b in qbytes)
  300. if qlen >= (1 << (8 * cfg_bytes_len)):
  301. print("qstr is too long:", qstr)
  302. assert False
  303. qlen_str = ("\\x%02x" * cfg_bytes_len) % tuple(
  304. ((qlen >> (8 * i)) & 0xFF) for i in range(cfg_bytes_len)
  305. )
  306. qhash_str = ("\\x%02x" * cfg_bytes_hash) % tuple(
  307. ((qhash >> (8 * i)) & 0xFF) for i in range(cfg_bytes_hash)
  308. )
  309. return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)
  310. def print_qstr_data(qcfgs, qstrs):
  311. # get config variables
  312. cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
  313. cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
  314. # print out the starter of the generated C header file
  315. print("// This file was automatically generated by makeqstrdata.py")
  316. print("")
  317. # add NULL qstr with no hash or data
  318. print(
  319. 'QDEF(MP_QSTRnull, (const byte*)"%s%s" "")'
  320. % ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len)
  321. )
  322. # go through each qstr and print it out
  323. for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
  324. qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
  325. print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
  326. def do_work(infiles):
  327. qcfgs, qstrs = parse_input_headers(infiles)
  328. print_qstr_data(qcfgs, qstrs)
  329. if __name__ == "__main__":
  330. do_work(sys.argv[1:])