| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413 |
- #!/usr/bin/env python3
- #
- # Copyright (C) 2019 Intel Corporation. All rights reserved.
- # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- #
- import argparse
- import os
- from pathlib import Path
- import re
- import shlex
- import subprocess
- import sys
- """
- This is a tool to convert addresses, which are from a call-stack dump generated by iwasm, into line info for a wasm file.
- When a wasm file is compiled with debug info, it is possible to transfer the address to line info.
- For example, there is a call-stack dump:
- ```
- #00: 0x0a04 - $f18
- #01: 0x08e4 - $f11
- #02: 0x096f - $f12
- #03: 0x01aa - _start
- ```
- - store the call-stack dump into a file, e.g. call_stack.txt
- - run the following command to convert the address into line info:
- ```
- $ cd test-tools/addr2line
- $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt
- ```
- The script will use *wasm-objdump* in wabt to transform address, then use *llvm-dwarfdump* to lookup the line info for each address
- in the call-stack dump.
- - if addresses are not available in the stack trace (i.e. iwasm <= 1.3.2) or iwasm is used in fast interpreter mode,
- run the following command to convert the function index into line info (passing the `--no-addr` option):
- ```
- $ python3 addr2line.py --wasi-sdk <wasi-sdk installation> --wabt <wabt installation> --wasm-file <wasm file path> call_stack.txt --no-addr
- ```
- The script will use *wasm-objdump* in wabt to get the function names corresponding to function indexes, then use *llvm-dwarfdump* to lookup the line info for each
- function index in the call-stack dump.
- """
- def locate_sourceMappingURL_section(wasm_objdump: Path, wasm_file: Path) -> bool:
- """
- Figure out if the wasm file has a sourceMappingURL section.
- """
- cmd = f"{wasm_objdump} -h {wasm_file}"
- p = subprocess.run(
- shlex.split(cmd),
- check=True,
- capture_output=True,
- text=True,
- universal_newlines=True,
- )
- outputs = p.stdout.split(os.linesep)
- for line in outputs:
- line = line.strip()
- if "sourceMappingURL" in line:
- return True
- return False
- def get_code_section_start(wasm_objdump: Path, wasm_file: Path) -> int:
- """
- Find the start offset of Code section in a wasm file.
- if the code section header likes:
- Code start=0x0000017c end=0x00004382 (size=0x00004206) count: 47
- the start offset is 0x0000017c
- """
- cmd = f"{wasm_objdump} -h {wasm_file}"
- p = subprocess.run(
- shlex.split(cmd),
- check=True,
- capture_output=True,
- text=True,
- universal_newlines=True,
- )
- outputs = p.stdout.split(os.linesep)
- for line in outputs:
- line = line.strip()
- if "Code" in line:
- return int(line.split()[1].split("=")[1], 16)
- return -1
- def get_line_info_from_function_addr_dwarf(
- dwarf_dump: Path, wasm_file: Path, offset: int
- ) -> tuple[str, str, str, str]:
- """
- Find the location info of a given offset in a wasm file.
- """
- cmd = f"{dwarf_dump} --lookup={offset} {wasm_file}"
- p = subprocess.run(
- shlex.split(cmd),
- check=False,
- capture_output=True,
- text=True,
- universal_newlines=True,
- )
- outputs = p.stdout.split(os.linesep)
- function_name, function_file = "<unknown>", "unknown"
- function_line, function_column = "?", "?"
- for line in outputs:
- line = line.strip()
- if "DW_AT_name" in line:
- function_name = get_dwarf_tag_value("DW_AT_name", line)
- if "DW_AT_decl_file" in line:
- function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
- if "Line info" in line:
- _, function_line, function_column = parse_line_info(line)
- return (function_name, function_file, function_line, function_column)
- def get_dwarf_tag_value(tag: str, line: str) -> str:
- # Try extracting value as string
- STR_PATTERN = rf"{tag}\s+\(\"(.*)\"\)"
- m = re.match(STR_PATTERN, line)
- if m:
- return m.groups()[0]
- # Try extracting value as integer
- INT_PATTERN = rf"{tag}\s+\((\d+)\)"
- m = re.match(INT_PATTERN, line)
- return m.groups()[0]
- def get_line_info_from_function_name_dwarf(
- dwarf_dump: Path, wasm_file: Path, function_name: str
- ) -> tuple[str, str, str]:
- """
- Find the location info of a given function in a wasm file.
- """
- cmd = f"{dwarf_dump} --name={function_name} {wasm_file}"
- p = subprocess.run(
- shlex.split(cmd),
- check=False,
- capture_output=True,
- text=True,
- universal_newlines=True,
- )
- outputs = p.stdout.split(os.linesep)
- function_name, function_file = "<unknown>", "unknown"
- function_line = "?"
- for line in outputs:
- line = line.strip()
- if "DW_AT_name" in line:
- function_name = get_dwarf_tag_value("DW_AT_name", line)
- if "DW_AT_decl_file" in line:
- function_file = get_dwarf_tag_value("DW_AT_decl_file", line)
- if "DW_AT_decl_line" in line:
- function_line = get_dwarf_tag_value("DW_AT_decl_line", line)
- return (function_name, function_file, function_line)
- def get_line_info_from_function_addr_sourcemapping(
- emsymbolizer: Path, wasm_file: Path, offset: int
- ) -> tuple[str, str, str, str]:
- """
- Find the location info of a given offset in a wasm file which is compiled with emcc.
- {emsymbolizer} {wasm_file} {offset of file}
- there usually are two lines:
- ??
- relative path to source file:line:column
- """
- debug_info_source = wasm_file.with_name(f"{wasm_file.name}.map")
- cmd = f"{emsymbolizer} -t code -f {debug_info_source} {wasm_file} {offset}"
- p = subprocess.run(
- shlex.split(cmd),
- check=False,
- capture_output=True,
- text=True,
- universal_newlines=True,
- cwd=Path.cwd(),
- )
- outputs = p.stdout.split(os.linesep)
- function_name, function_file = "<unknown>", "unknown"
- function_line, function_column = "?", "?"
- for line in outputs:
- line = line.strip()
- if not line:
- continue
- m = re.match(r"(.*):(\d+):(\d+)", line)
- if m:
- function_file, function_line, function_column = m.groups()
- continue
- else:
- # it's always ??, not sure about that
- if "??" != line:
- function_name = line
- return (function_name, function_file, function_line, function_column)
- def parse_line_info(line_info: str) -> tuple[str, str, str]:
- """
- line_info -> [file, line, column]
- """
- PATTERN = r"Line info: file \'(.+)\', line ([0-9]+), column ([0-9]+)"
- m = re.search(PATTERN, line_info)
- assert m is not None
- file, line, column = m.groups()
- return (file, int(line), int(column))
- def parse_call_stack_line(line: str) -> tuple[str, str, str]:
- """
- New format (WAMR > 1.3.2):
- #00: 0x0a04 - $f18 => (00, 0x0a04, $f18)
- Old format:
- #00 $f18 => (00, _, $f18)
- Text format (-DWAMR_BUILD_LOAD_CUSTOM_SECTION=1 -DWAMR_BUILD_CUSTOM_NAME_SECTION=1):
- #02: 0x0200 - a => (02, 0x0200, a)
- _start (always):
- #05: 0x011f - _start => (05, 0x011f, _start)
- """
- # New format and Text format and _start
- PATTERN = r"#([0-9]+): 0x([0-9a-f]+) - (\S+)"
- m = re.match(PATTERN, line)
- if m is not None:
- return m.groups()
- # Old format
- PATTERN = r"#([0-9]+) (\S+)"
- m = re.match(PATTERN, line)
- if m is not None:
- return (m.groups()[0], None, m.groups()[1])
- return None
- def parse_module_functions(wasm_objdump: Path, wasm_file: Path) -> dict[str, str]:
- function_index_to_name = {}
- cmd = f"{wasm_objdump} -x {wasm_file} --section=function"
- p = subprocess.run(
- shlex.split(cmd),
- check=True,
- capture_output=True,
- text=True,
- universal_newlines=True,
- )
- outputs = p.stdout.split(os.linesep)
- for line in outputs:
- if not f"func[" in line:
- continue
- PATTERN = r".*func\[([0-9]+)\].*<(.*)>"
- m = re.match(PATTERN, line)
- assert m is not None
- index = m.groups()[0]
- name = m.groups()[1]
- function_index_to_name[index] = name
- return function_index_to_name
- def demangle(cxxfilt: Path, function_name: str) -> str:
- cmd = f"{cxxfilt} -n {function_name}"
- p = subprocess.run(
- shlex.split(cmd),
- check=True,
- capture_output=True,
- text=True,
- universal_newlines=True,
- )
- return p.stdout.strip()
- def main():
- parser = argparse.ArgumentParser(description="addr2line for wasm")
- parser.add_argument("--wasi-sdk", type=Path, help="path to wasi-sdk")
- parser.add_argument("--wabt", type=Path, help="path to wabt")
- parser.add_argument("--wasm-file", type=Path, help="path to wasm file")
- parser.add_argument("call_stack_file", type=Path, help="path to a call stack file")
- parser.add_argument(
- "--no-addr",
- action="store_true",
- help="use call stack without addresses or from fast interpreter mode",
- )
- parser.add_argument("--emsdk", type=Path, help="path to emsdk")
- args = parser.parse_args()
- wasm_objdump = args.wabt.joinpath("bin/wasm-objdump")
- assert wasm_objdump.exists()
- llvm_dwarf_dump = args.wasi_sdk.joinpath("bin/llvm-dwarfdump")
- assert llvm_dwarf_dump.exists()
- llvm_cxxfilt = args.wasi_sdk.joinpath("bin/llvm-cxxfilt")
- assert llvm_cxxfilt.exists()
- emcc_production = locate_sourceMappingURL_section(wasm_objdump, args.wasm_file)
- if emcc_production:
- if args.emsdk is None:
- print("Please provide the path to emsdk via --emsdk")
- return -1
- emsymbolizer = args.emsdk.joinpath("upstream/emscripten/emsymbolizer")
- assert emsymbolizer.exists()
- code_section_start = get_code_section_start(wasm_objdump, args.wasm_file)
- if code_section_start == -1:
- return -1
- function_index_to_name = parse_module_functions(wasm_objdump, args.wasm_file)
- assert args.call_stack_file.exists()
- with open(args.call_stack_file, "rt", encoding="ascii") as f:
- for i, line in enumerate(f):
- line = line.strip()
- if not line:
- continue
- splitted = parse_call_stack_line(line)
- if splitted is None:
- print(f"{line}")
- continue
- _, offset, index = splitted
- if args.no_addr:
- # FIXME: w/ emcc production
- if not index.startswith("$f"): # E.g. _start or Text format
- print(f"{i}: {index}")
- continue
- index = index[2:]
- if index not in function_index_to_name:
- print(f"{i}: {line}")
- continue
- if not emcc_production:
- _, function_file, function_line = (
- get_line_info_from_function_name_dwarf(
- llvm_dwarf_dump,
- args.wasm_file,
- function_index_to_name[index],
- )
- )
- else:
- _, function_file, function_line = _, "unknown", "?"
- function_name = demangle(llvm_cxxfilt, function_index_to_name[index])
- print(f"{i}: {function_name}")
- print(f"\tat {function_file}:{function_line}")
- else:
- offset = int(offset, 16)
- # match the algorithm in wasm_interp_create_call_stack()
- # either a *offset* to *code* section start
- # or a *offset* in a file
- assert offset > code_section_start
- offset = offset - code_section_start
- if emcc_production:
- function_name, function_file, function_line, function_column = (
- get_line_info_from_function_addr_sourcemapping(
- emsymbolizer, args.wasm_file, offset
- )
- )
- else:
- function_name, function_file, function_line, function_column = (
- get_line_info_from_function_addr_dwarf(
- llvm_dwarf_dump, args.wasm_file, offset
- )
- )
- # if can't parse function_name, use name section or <index>
- if function_name == "<unknown>":
- if index.startswith("$f"):
- function_name = function_index_to_name.get(index[2:], index)
- else:
- function_name = index
- function_name = demangle(llvm_cxxfilt, function_name)
- print(f"{i}: {function_name}")
- print(f"\tat {function_file}:{function_line}:{function_column}")
- return 0
- if __name__ == "__main__":
- sys.exit(main())
|