addr2line.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (C) 2019 Intel Corporation. All rights reserved.
  4. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  5. #
  6. import argparse
  7. import os
  8. from pathlib import Path
  9. import re
  10. import shlex
  11. import subprocess
  12. import sys
  13. """
  14. This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.
  15. When a wasm file is compiled with debug info, it is possible to transfer the address to line info.
  16. For example, there is a call-stack dump:
  17. ```
  18. #00: 0x0a04 - $f18
  19. #01: 0x08e4 - $f11
  20. #02: 0x096f - $f12
  21. #03: 0x01aa - _start
  22. ```
  23. - store the call-stack dump into a file, e.g. call_stack.txt
  24. - run the following command to convert the address into line info:
  25. ```
  26. $ cd test-tools/addr2line
  27. $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
  28. ```
  29. The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
  30. in the call-stack dump.
  31. - if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
  32. run the following command to convert the function index into line info (passing the `--no-addr` option):
  33. ```
  34. $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
  35. ```
  36. The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
  37. function index in the call-stack dump.
  38. """
  39. def locate_sourceMappingURL_section(wasm_objdump: Path, wasm_file: Path) -> bool:
  40. """
  41. Figure out if the wasm file has a sourceMappingURL section.
  42. """
  43. cmd = f"{wasm_objdump} -h {wasm_file}"
  44. p = subprocess.run(
  45. shlex.split(cmd),
  46. check=True,
  47. capture_output=True,
  48. text=True,
  49. universal_newlines=True,
  50. )
  51. outputs = p.stdout.split(os.linesep)
  52. for line in outputs:
  53. line = line.strip()
  54. if "sourceMappingURL" in line:
  55. return True
  56. return False
  57. def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
  58. """
  59. Find the start offset of Code section in a wasm file.
  60. if the code section header likes:
  61. Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47
  62. the start offset is 0x0000017c
  63. """
  64. cmd = f"{wasm_objdump} -h {wasm_file}"
  65. p = subprocess.run(
  66. shlex.split(cmd),
  67. check=True,
  68. capture_output=True,
  69. text=True,
  70. universal_newlines=True,
  71. )
  72. outputs = p.stdout.split(os.linesep)
  73. for line in outputs:
  74. line = line.strip()
  75. if "Code" in line:
  76. return int(line.split()[1].split("=")[1], 16)
  77. return -1
  78. def get_line_info_from_function_addr_dwarf(
  79. dwarf_dump: Path, wasm_file: Path, offset: int
  80. ) -> tuple[str, str, str, str]:
  81. """
  82. Find the location info of a given offset in a wasm file.
  83. """
  84. cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}"
  85. p = subprocess.run(
  86. shlex.split(cmd),
  87. check=False,
  88. capture_output=True,
  89. text=True,
  90. universal_newlines=True,
  91. )
  92. outputs = p.stdout.split(os.linesep)
  93. function_name, function_file = "<unknown>", "unknown"
  94. function_line, function_column = "?", "?"
  95. for line in outputs:
  96. line = line.strip()
  97. if "DW_AT_name" in line:
  98. function_name = get_dwarf_tag_value("DW_AT_name", line)
  99. if "DW_AT_decl_file" in line:
  100. function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
  101. if "Line info" in line:
  102. _, function_line, function_column = parse_line_info(line)
  103. return (function_name, function_file, function_line, function_column)
  104. def get_dwarf_tag_value(tag: str, line: str) -> str:
  105. # Try extracting value as string
  106. STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
  107. m = re.match(STR_PATTERN, line)
  108. if m:
  109. return m.groups()[0]
  110. # Try extracting value as integer
  111. INT_PATTERN = rf"{tag}\s+\((\d+)\)"
  112. m = re.match(INT_PATTERN, line)
  113. return m.groups()[0]
  114. def get_line_info_from_function_name_dwarf(
  115. dwarf_dump: Path, wasm_file: Path, function_name: str
  116. ) -> tuple[str, str, str]:
  117. """
  118. Find the location info of a given function in a wasm file.
  119. """
  120. cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
  121. p = subprocess.run(
  122. shlex.split(cmd),
  123. check=False,
  124. capture_output=True,
  125. text=True,
  126. universal_newlines=True,
  127. )
  128. outputs = p.stdout.split(os.linesep)
  129. function_name, function_file = "<unknown>", "unknown"
  130. function_line = "?"
  131. for line in outputs:
  132. line = line.strip()
  133. if "DW_AT_name" in line:
  134. function_name = get_dwarf_tag_value("DW_AT_name", line)
  135. if "DW_AT_decl_file" in line:
  136. function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
  137. if "DW_AT_decl_line" in line:
  138. function_line = get_dwarf_tag_value("DW_AT_decl_line", line)
  139. return (function_name, function_file, function_line)
  140. def get_line_info_from_function_addr_sourcemapping(
  141. emsymbolizer: Path, wasm_file: Path, offset: int
  142. ) -> tuple[str, str, str, str]:
  143. """
  144. Find the location info of a given offset in a wasm file which is compiled with emcc.
  145. {emsymbolizer} {wasm_file} {offset of file}
  146. there usually are two lines:
  147. ??
  148. relative path to source file:line:column
  149. """
  150. debug_info_source = wasm_file.with_name(f"{wasm_file.name}.map")
  151. cmd = f"{emsymbolizer} -t code -f {debug_info_source} {wasm_file} {offset}"
  152. p = subprocess.run(
  153. shlex.split(cmd),
  154. check=False,
  155. capture_output=True,
  156. text=True,
  157. universal_newlines=True,
  158. cwd=Path.cwd(),
  159. )
  160. outputs = p.stdout.split(os.linesep)
  161. function_name, function_file = "<unknown>", "unknown"
  162. function_line, function_column = "?", "?"
  163. for line in outputs:
  164. line = line.strip()
  165. if not line:
  166. continue
  167. m = re.match("(.*):(\d+):(\d+)", line)
  168. if m:
  169. function_file, function_line, function_column = m.groups()
  170. continue
  171. else:
  172. # it's always ??, not sure about that
  173. if "??" != line:
  174. function_name = line
  175. return (function_name, function_file, function_line, function_column)
  176. def parse_line_info(line_info: str) -> tuple[str, str, str]:
  177. """
  178. line_info -> [file, line, column]
  179. """
  180. PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)"
  181. m = re.search(PATTERN, line_info)
  182. assert m is not None
  183. file, line, column = m.groups()
  184. return (file, int(line), int(column))
  185. def parse_call_stack_line(line: str) -> tuple[str, str, str]:
  186. """
  187. New format (WAMR > 1.3.2):
  188. #00: 0x0a04 - $f18 => (00, 0x0a04, $f18)
  189. Old format:
  190. #00 $f18 => (00, _, $f18)
  191. Text format (-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1 -DWAMR_BUILD_CUSTOM_NAME_SECTION=1):
  192. #02: 0x0200 - a => (02, 0x0200, a)
  193. _start (always):
  194. #05: 0x011f - _start => (05, 0x011f, _start)
  195. """
  196. # New format and Text format and _start
  197. PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
  198. m = re.match(PATTERN, line)
  199. if m is not None:
  200. return m.groups()
  201. # Old format
  202. PATTERN = r"#([0-9]+) (\S+)"
  203. m = re.match(PATTERN, line)
  204. if m is not None:
  205. return (m.groups()[0], None, m.groups()[1])
  206. return None
  207. def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
  208. function_index_to_name = {}
  209. cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
  210. p = subprocess.run(
  211. shlex.split(cmd),
  212. check=True,
  213. capture_output=True,
  214. text=True,
  215. universal_newlines=True,
  216. )
  217. outputs = p.stdout.split(os.linesep)
  218. for line in outputs:
  219. if not f"func[" in line:
  220. continue
  221. PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
  222. m = re.match(PATTERN, line)
  223. assert m is not None
  224. index = m.groups()[0]
  225. name = m.groups()[1]
  226. function_index_to_name[index] = name
  227. return function_index_to_name
  228. def demangle(cxxfilt: Path, function_name: str) -> str:
  229. cmd = f"{cxxfilt} -n {function_name}"
  230. p = subprocess.run(
  231. shlex.split(cmd),
  232. check=True,
  233. capture_output=True,
  234. text=True,
  235. universal_newlines=True,
  236. )
  237. return p.stdout.strip()
  238. def main():
  239. parser = argparse.ArgumentParser(description="addr2line for wasm")
  240. parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk")
  241. parser.add_argument("--wabt", type=Path, help="path to wabt")
  242. parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
  243. parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
  244. parser.add_argument(
  245. "--no-addr",
  246. action="store_true",
  247. help="use call stack without addresses or from fast interpreter mode",
  248. )
  249. parser.add_argument("--emsdk", type=Path, help="path to emsdk")
  250. args = parser.parse_args()
  251. wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
  252. assert wasm_objdump.exists()
  253. llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump")
  254. assert llvm_dwarf_dump.exists()
  255. llvm_cxxfilt = args.wasi_sdk.joinpath("bin/llvm-cxxfilt")
  256. assert llvm_cxxfilt.exists()
  257. emcc_production = locate_sourceMappingURL_section(wasm_objdump, args.wasm_file)
  258. if emcc_production:
  259. if args.emsdk is None:
  260. print("Please provide the path to emsdk via --emsdk")
  261. return -1
  262. emsymbolizer = args.emsdk.joinpath("upstream/emscripten/emsymbolizer")
  263. assert emsymbolizer.exists()
  264. code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
  265. if code_section_start == -1:
  266. return -1
  267. function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)
  268. assert args.call_stack_file.exists()
  269. with open(args.call_stack_file, "rt", encoding="ascii") as f:
  270. for i, line in enumerate(f):
  271. line = line.strip()
  272. if not line:
  273. continue
  274. splitted = parse_call_stack_line(line)
  275. if splitted is None:
  276. print(f"{line}")
  277. continue
  278. _, offset, index = splitted
  279. if args.no_addr:
  280. # FIXME: w/ emcc production
  281. if not index.startswith("$f"): # E.g. _start or Text format
  282. print(f"{i}: {index}")
  283. continue
  284. index = index[2:]
  285. if index not in function_index_to_name:
  286. print(f"{i}: {line}")
  287. continue
  288. if not emcc_production:
  289. _, function_file, function_line = (
  290. get_line_info_from_function_name_dwarf(
  291. llvm_dwarf_dump,
  292. args.wasm_file,
  293. function_index_to_name[index],
  294. )
  295. )
  296. else:
  297. _, function_file, function_line = _, "unknown", "?"
  298. function_name = demangle(llvm_cxxfilt, function_index_to_name[index])
  299. print(f"{i}: {function_name}")
  300. print(f"\tat {function_file}:{function_line}")
  301. else:
  302. offset = int(offset, 16)
  303. # match the algorithm in wasm_interp_create_call_stack()
  304. # either a *offset* to *code* section start
  305. # or a *offset* in a file
  306. assert offset > code_section_start
  307. offset = offset - code_section_start
  308. if emcc_production:
  309. function_name, function_file, function_line, function_column = (
  310. get_line_info_from_function_addr_sourcemapping(
  311. emsymbolizer, args.wasm_file, offset
  312. )
  313. )
  314. else:
  315. function_name, function_file, function_line, function_column = (
  316. get_line_info_from_function_addr_dwarf(
  317. llvm_dwarf_dump, args.wasm_file, offset
  318. )
  319. )
  320. # if can't parse function_name, use name section or <index>
  321. if function_name == "<unknown>":
  322. if index.startswith("$f"):
  323. function_name = function_index_to_name.get(index[2:], index)
  324. else:
  325. function_name = index
  326. function_name = demangle(llvm_cxxfilt, function_name)
  327. print(f"{i}: {function_name}")
  328. print(f"\tat {function_file}:{function_line}:{function_column}")
  329. return 0
  330. if __name__ == "__main__":
  331. sys.exit(main())