process_folded_data.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (C) 2019 Intel Corporation. All rights reserved.
  4. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  5. #
  6. """
  7. It is used to process *out.folded* file generated by [FlameGraph](https://github.com/brendangregg/FlameGraph).
  8. - translate jitted function names, which are in a form like `aot_func#N` or `[module name]#aot_func#N`, into corresponding names in a name section in .wasm
  9. - divide the translated functions into different modules if the module name is specified in the symbol
  10. Usage:
  11. After
  12. ``` bash
  13. # collect profiling data in perf.data
  14. $ perf script -i perf.data > out.perf
  15. $ ./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
  16. ```
  17. Use this script to translate the function names in out.folded
  18. ```
  19. $ python translate_wasm_function_name.py --wabt_home <wabt-installation> --folded out.folded <.wasm>
  20. # out.folded -> out.folded.translated
  21. ```
  22. """
  23. import argparse
  24. import os
  25. from pathlib import Path
  26. import re
  27. import shlex
  28. import subprocess
  29. from typing import Dict, List
  30. # parse arguments like "foo=bar,fiz=biz" into a dictatory {foo:bar,fiz=biz}
  31. class ParseKVArgs(argparse.Action):
  32. def __call__(self, parser, namespace, values, option_string=None):
  33. setattr(namespace, self.dest, dict())
  34. for value in values.split(","):
  35. k, v = value.split("=")
  36. getattr(namespace, self.dest)[k] = v
  37. def calculate_import_function_count(
  38. wasm_objdump_bin: Path, module_names: Dict[str, Path]
  39. ) -> Dict[str, int]:
  40. """
  41. for every wasm file in <module_names>, calculate the number of functions in the import section.
  42. using "<wasm_objdump_bin> -j Import -x <wasm_file>"
  43. """
  44. assert wasm_objdump_bin.exists()
  45. import_function_counts = {}
  46. for module_name, wasm_path in module_names.items():
  47. assert wasm_path.exists()
  48. command = f"{wasm_objdump_bin} -j Import -x {wasm_path}"
  49. p = subprocess.run(
  50. shlex.split(command),
  51. capture_output=True,
  52. check=False,
  53. text=True,
  54. universal_newlines=True,
  55. )
  56. if p.stderr:
  57. print("No content in import section")
  58. import_function_counts[module_name] = 0
  59. continue
  60. import_function_count = 0
  61. for line in p.stdout.split(os.linesep):
  62. line = line.strip()
  63. if not line:
  64. continue
  65. if not " func" in line:
  66. continue
  67. m = re.search(r"^-\s+func", line)
  68. assert m
  69. import_function_count += 1
  70. # print(f"! there are {import_function_count} import function in {module_name}")
  71. import_function_counts[module_name] = import_function_count
  72. return import_function_counts
  73. def collect_name_section_content(
  74. wasm_objdump_bin: Path, module_names: Dict[str, Path]
  75. ) -> Dict[str, Dict[int, str]]:
  76. """
  77. for every wasm file in <module_names>, get the content of name section.
  78. execute "wasm_objdump_bin -j name -x wasm_file"
  79. """
  80. assert wasm_objdump_bin.exists()
  81. name_sections = {}
  82. for module_name, wasm_path in module_names.items():
  83. assert wasm_path.exists()
  84. command = f"{wasm_objdump_bin} -j name -x {wasm_path}"
  85. p = subprocess.run(
  86. shlex.split(command),
  87. capture_output=True,
  88. check=False,
  89. text=True,
  90. universal_newlines=True,
  91. )
  92. if p.stderr:
  93. print("No content in name section")
  94. name_sections[module_name] = {}
  95. continue
  96. name_section = {}
  97. for line in p.stdout.split(os.linesep):
  98. line = line.strip()
  99. if not line:
  100. continue
  101. if not " func" in line:
  102. continue
  103. # - func[N] <__imported_wasi_snapshot_preview1_fd_close>
  104. m = re.match(r"- func\[(\d+)\] <(.+)>", line)
  105. assert m
  106. func_index, func_name = m.groups()
  107. name_section.update({int(func_index): func_name})
  108. name_sections[module_name] = name_section
  109. return name_sections
  110. def is_stack_check_mode(folded: Path) -> bool:
  111. """
  112. check if there is a function name looks like "aot_func_internal#N", it means that WAMR adds a stack check function before the original function.
  113. """
  114. with open(folded, "rt", encoding="utf-8") as f:
  115. for line in f:
  116. line = line.strip()
  117. if "aot_func_internal" in line:
  118. return True
  119. return False
  120. def replace_function_name(
  121. import_function_counts: Dict[str, int],
  122. name_sections: Dict[str, Dict[int, str]],
  123. folded_in: Path,
  124. module_names: Dict[str, Path],
  125. ) -> None:
  126. """
  127. read content in <folded_in>. every line contains symbols which are separated by ";".
  128. Usually, all jitted functions are in the form of "aot_func#N". N is its function index. Use the index to find the corresponding function name in the name section.
  129. if there is a function name looks like "aot_func_internal#N", it means that WAMR adds a stack check function before the original function.
  130. In this case, "aot_func#N" should be translated with "_precheck" as a suffix and "aot_func_internal#N" should be treated as the original one
  131. """
  132. assert folded_in.exists(), f"{folded_in} doesn't exist"
  133. stack_check_mode = is_stack_check_mode(folded_in)
  134. # every wasm has a translated out.folded, like out.<module_name>.folded.translated
  135. folded_out_files = {}
  136. for module_name in module_names.keys():
  137. wasm_folded_out_path = folded_in.with_suffix(f".{module_name}.translated")
  138. print(f"-> write into {wasm_folded_out_path}")
  139. folded_out_files[module_name] = wasm_folded_out_path.open(
  140. "wt", encoding="utf-8"
  141. )
  142. # Plus a default translated out.folded
  143. default_folded_out_path = folded_in.with_suffix(".translated")
  144. print(f"-> write into {default_folded_out_path}")
  145. default_folded_out = default_folded_out_path.open("wt", encoding="utf-8")
  146. with folded_in.open("rt", encoding="utf-8") as f_in:
  147. for line in f_in:
  148. line = line.strip()
  149. m = re.match(r"(.*) (\d+)", line)
  150. assert m
  151. syms, samples = m.groups()
  152. new_line = []
  153. last_function_module_name = ""
  154. for sym in syms.split(";"):
  155. if not "aot_func" in sym:
  156. new_line.append(sym)
  157. continue
  158. # [module_name]#aot_func#N or aot_func#N
  159. splitted = sym.split("#")
  160. module_name = "" if splitted[0] == "aot_func" else splitted[0]
  161. # remove [ and ]
  162. module_name = module_name[1:-1]
  163. if len(module_name) == 0 and len(module_names) > 1:
  164. raise RuntimeError(
  165. f"❌ {sym} doesn't have a module name, but there are multiple wasm files"
  166. )
  167. if not module_name in module_names:
  168. raise RuntimeError(
  169. f"❌ can't find corresponds wasm file for {module_name}"
  170. )
  171. last_function_module_name = module_name
  172. func_idx = int(splitted[-1])
  173. # adjust index
  174. func_idx = func_idx + import_function_counts[module_name]
  175. # print(f"🔍 {module_name} {splitted[1]} {func_idx}")
  176. if func_idx in name_sections[module_name]:
  177. if len(module_name) > 0:
  178. wasm_func_name = f"[Wasm] [{module_name}] {name_sections[module_name][func_idx]}"
  179. else:
  180. wasm_func_name = (
  181. f"[Wasm] {name_sections[module_name][func_idx]}"
  182. )
  183. else:
  184. if len(module_name) > 0:
  185. wasm_func_name = f"[Wasm] [{module_name}] func[{func_idx}]"
  186. else:
  187. wasm_func_name = f"[Wasm] func[{func_idx}]"
  188. if stack_check_mode:
  189. # aot_func_internal -> xxx
  190. # aot_func --> xxx_precheck
  191. if "aot_func" == splitted[1]:
  192. wasm_func_name += "_precheck"
  193. new_line.append(wasm_func_name)
  194. line = ";".join(new_line)
  195. line += f" {samples}"
  196. # always write into the default output
  197. default_folded_out.write(line + os.linesep)
  198. # based on the module name of last function, write into the corresponding output
  199. if len(last_function_module_name) > 0:
  200. folded_out_files[last_function_module_name].write(line + os.linesep)
  201. default_folded_out.close()
  202. for f in folded_out_files.values():
  203. f.close()
  204. def main(wabt_home: str, folded: str, module_names: Dict[str, Path]) -> None:
  205. wabt_home = Path(wabt_home)
  206. assert wabt_home.exists()
  207. folded = Path(folded)
  208. assert folded.exists()
  209. wasm_objdump_bin = wabt_home.joinpath("bin", "wasm-objdump")
  210. import_function_counts = calculate_import_function_count(
  211. wasm_objdump_bin, module_names
  212. )
  213. name_sections = collect_name_section_content(wasm_objdump_bin, module_names)
  214. replace_function_name(import_function_counts, name_sections, folded, module_names)
  215. if __name__ == "__main__":
  216. argparse = argparse.ArgumentParser()
  217. argparse.add_argument(
  218. "--wabt_home", required=True, help="wabt home, like /opt/wabt-1.0.33"
  219. )
  220. argparse.add_argument(
  221. "--wasm",
  222. action="append",
  223. default=[],
  224. help="wasm files for profiling before. like --wasm apple.wasm --wasm banana.wasm",
  225. )
  226. argparse.add_argument(
  227. "--wasm_names",
  228. action=ParseKVArgs,
  229. default={},
  230. metavar="module_name=wasm_file, ...",
  231. help="multiple wasm files and their module names, like a=apple.wasm,b=banana.wasm,c=cake.wasm",
  232. )
  233. argparse.add_argument(
  234. "folded_file",
  235. help="a out.folded generated by flamegraph/stackcollapse-perf.pl",
  236. )
  237. args = argparse.parse_args()
  238. if not args.wasm and not args.wasm_names:
  239. print("Please specify wasm files with either --wasm or --wasm_names")
  240. exit(1)
  241. # - only one wasm file. And there is no [module name] in out.folded
  242. # - multiple wasm files. via `--wasm X --wasm Y --wasm Z`. And there is [module name] in out.folded. use the basename of wasm as the module name
  243. # - multiple wasm files. via `--wasm_names X=x,Y=y,Z=z`. And there is [module name] in out.folded. use the specified module name
  244. module_names = {}
  245. if args.wasm_names:
  246. for name, wasm_path in args.wasm_names.items():
  247. module_names[name] = Path(wasm_path)
  248. else:
  249. # use the basename of wasm as the module name
  250. for wasm in args.wasm:
  251. wasm_path = Path(wasm)
  252. module_names[wasm_path.stem] = wasm_path
  253. main(args.wabt_home, args.folded_file, module_names)