check_callgraph.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. #!/usr/bin/env python
  2. #
  3. # Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
  4. # SPDX-FileCopyrightText: 2020-2022 Espressif Systems (Shanghai) CO LTD
  5. # SPDX-License-Identifier: Apache-2.0
  6. import argparse
  7. import os
  8. import re
  9. from functools import partial
  10. import elftools
  11. from elftools.elf import elffile
  12. try:
  13. from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple
  14. except ImportError:
  15. pass
  16. FUNCTION_REGEX = re.compile(
  17. r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$'
  18. )
  19. CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
  20. SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
  21. class RtlFunction(object):
  22. def __init__(self, name, rtl_filename, tu_filename):
  23. self.name = name
  24. self.rtl_filename = rtl_filename
  25. self.tu_filename = tu_filename
  26. self.calls = list() # type: List[str]
  27. self.refs = list() # type: List[str]
  28. self.sym = None
  29. class SectionAddressRange(object):
  30. def __init__(self, name, addr, size): # type: (str, int, int) -> None
  31. self.name = name
  32. self.low = addr
  33. self.high = addr + size
  34. def __str__(self):
  35. return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high)
  36. def contains_address(self, addr):
  37. return self.low <= addr < self.high
  38. TARGET_SECTIONS = {
  39. 'esp32': [
  40. SectionAddressRange('.rom.text', 0x40000000, 0x70000),
  41. SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018)
  42. ],
  43. 'esp32s2': [
  44. SectionAddressRange('.rom.text', 0x40000000, 0x1bed0),
  45. SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c)
  46. ],
  47. 'esp32s3': [
  48. SectionAddressRange('.rom.text', 0x40000000, 0x568d0),
  49. SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30)
  50. ]
  51. } # type: Dict[str, List[SectionAddressRange]]
  52. class Symbol(object):
  53. def __init__(self, name, addr, local, filename, section): # type: (str, int, bool, Optional[str], Optional[str]) -> None
  54. self.name = name
  55. self.addr = addr
  56. self.local = local
  57. self.filename = filename
  58. self.section = section
  59. self.refers_to = list() # type: List[Symbol]
  60. self.referred_from = list() # type: List[Symbol]
  61. def __str__(self):
  62. return '{} @0x{:08x} [{}]{} {}'.format(
  63. self.name,
  64. self.addr,
  65. self.section or 'unknown',
  66. ' (local)' if self.local else '',
  67. self.filename
  68. )
  69. class Reference(object):
  70. def __init__(self, from_sym, to_sym): # type: (Symbol, Symbol) -> None
  71. self.from_sym = from_sym
  72. self.to_sym = to_sym
  73. def __str__(self):
  74. return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format(
  75. self.from_sym.name,
  76. self.from_sym.addr,
  77. self.from_sym.section,
  78. self.to_sym.name,
  79. self.to_sym.addr,
  80. self.to_sym.section
  81. )
  82. class ElfInfo(object):
  83. def __init__(self, elf_file): # type: (BinaryIO) -> None
  84. self.elf_file = elf_file
  85. self.elf_obj = elffile.ELFFile(self.elf_file)
  86. self.section_ranges = self._load_sections()
  87. self.symbols = self._load_symbols()
  88. def _load_symbols(self): # type: () -> List[Symbol]
  89. symbols = []
  90. for s in self.elf_obj.iter_sections():
  91. if not isinstance(s, elftools.elf.sections.SymbolTableSection):
  92. continue
  93. filename = None
  94. for sym in s.iter_symbols():
  95. sym_type = sym.entry['st_info']['type']
  96. if sym_type == 'STT_FILE':
  97. filename = sym.name
  98. if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']:
  99. local = sym.entry['st_info']['bind'] == 'STB_LOCAL'
  100. addr = sym.entry['st_value']
  101. symbols.append(
  102. Symbol(
  103. sym.name,
  104. addr,
  105. local,
  106. filename if local else None,
  107. self.section_for_addr(addr),
  108. )
  109. )
  110. return symbols
  111. def _load_sections(self): # type: () -> List[SectionAddressRange]
  112. result = []
  113. for segment in self.elf_obj.iter_segments():
  114. if segment['p_type'] == 'PT_LOAD':
  115. for section in self.elf_obj.iter_sections():
  116. if not segment.section_in_segment(section):
  117. continue
  118. result.append(
  119. SectionAddressRange(
  120. section.name, section['sh_addr'], section['sh_size']
  121. )
  122. )
  123. target = os.environ.get('IDF_TARGET')
  124. if target in TARGET_SECTIONS:
  125. result += TARGET_SECTIONS[target]
  126. return result
  127. def symbols_by_name(self, name): # type: (str) -> List[Symbol]
  128. res = []
  129. for sym in self.symbols:
  130. if sym.name == name:
  131. res.append(sym)
  132. return res
  133. def section_for_addr(self, sym_addr): # type: (int) -> Optional[str]
  134. for sar in self.section_ranges:
  135. if sar.contains_address(sym_addr):
  136. return sar.name
  137. return None
  138. def load_rtl_file(rtl_filename, tu_filename, functions): # type: (str, str, List[RtlFunction]) -> None
  139. last_function = None # type: Optional[RtlFunction]
  140. for line in open(rtl_filename):
  141. # Find function definition
  142. match = re.match(FUNCTION_REGEX, line)
  143. if match:
  144. function_name = match.group('function')
  145. last_function = RtlFunction(function_name, rtl_filename, tu_filename)
  146. functions.append(last_function)
  147. continue
  148. if last_function:
  149. # Find direct function calls
  150. match = re.match(CALL_REGEX, line)
  151. if match:
  152. target = match.group('target')
  153. if target not in last_function.calls:
  154. last_function.calls.append(target)
  155. continue
  156. # Find symbol references
  157. match = re.match(SYMBOL_REF_REGEX, line)
  158. if match:
  159. target = match.group('target')
  160. if target not in last_function.refs:
  161. last_function.refs.append(target)
  162. continue
  163. def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename): # type: (str, str) -> bool
  164. # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
  165. # RTL file names are paths relative to the build directory, e.g.:
  166. # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
  167. #
  168. # The check below may give a false positive if there are two files with the same name in
  169. # different directories. This doesn't seem to happen in IDF now, but if it does happen,
  170. # an assert in find_symbol_by_rtl_func should catch this.
  171. #
  172. # If this becomes and issue, consider also loading the .map file and using it to figure out
  173. # which object file was used as the source of each symbol. Names of the object files and RTL files
  174. # should be much easier to match.
  175. return os.path.basename(rtl_filename).startswith(symbol_filename)
  176. class SymbolNotFound(RuntimeError):
  177. pass
  178. def find_symbol_by_name(name, elfinfo, local_func_matcher): # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol]
  179. """
  180. Find an ELF symbol for the given name.
  181. local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
  182. """
  183. syms = elfinfo.symbols_by_name(name)
  184. if not syms:
  185. return None
  186. if len(syms) == 1:
  187. return syms[0]
  188. else:
  189. # There are multiple symbols with a given name. Find the best fit.
  190. local_candidate = None
  191. global_candidate = None
  192. for sym in syms:
  193. if not sym.local:
  194. assert not global_candidate # can't have two global symbols with the same name
  195. global_candidate = sym
  196. elif local_func_matcher(sym):
  197. assert not local_candidate # can't have two symbols with the same name in a single file
  198. local_candidate = sym
  199. # If two symbols with the same name are defined, a global and a local one,
  200. # prefer the local symbol as the reference target.
  201. return local_candidate or global_candidate
  202. def match_local_source_func(rtl_filename, sym): # type: (str, Symbol) -> bool
  203. """
  204. Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
  205. reference source (caller), based on the RTL file name.
  206. """
  207. assert sym.filename # should be set for local functions
  208. return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
  209. def match_local_target_func(rtl_filename, sym_from, sym): # type: (str, Symbol, Symbol) -> bool
  210. """
  211. Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
  212. reference target (callee or referenced data), based on RTL filename of the source symbol
  213. and the source symbol itself.
  214. """
  215. assert sym.filename # should be set for local functions
  216. if sym_from.local:
  217. # local symbol referencing another local symbol
  218. return sym_from.filename == sym.filename
  219. else:
  220. # global symbol referencing a local symbol;
  221. # source filename is not known, use RTL filename as a hint
  222. return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
  223. def match_rtl_funcs_to_symbols(rtl_functions, elfinfo): # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]]
  224. symbols = [] # type: List[Symbol]
  225. refs = [] # type: List[Reference]
  226. # General idea:
  227. # - iterate over RTL functions.
  228. # - for each RTL function, find the corresponding symbol
  229. # - iterate over the functions and variables referenced from this RTL function
  230. # - find symbols corresponding to the references
  231. # - record every pair (sym_from, sym_to) as a Reference object
  232. for source_rtl_func in rtl_functions:
  233. maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
  234. if maybe_sym_from is None:
  235. # RTL references a symbol, but the symbol is not defined in the generated object file.
  236. # This means that the symbol was likely removed (or not included) at link time.
  237. # There is nothing we can do to check section placement in this case.
  238. continue
  239. sym_from = maybe_sym_from
  240. if sym_from not in symbols:
  241. symbols.append(sym_from)
  242. for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
  243. if '*.LC' in target_rtl_func_name: # skip local labels
  244. continue
  245. maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
  246. if not maybe_sym_to:
  247. # This may happen for a extern reference in the RTL file, if the reference was later removed
  248. # by one of the optimization passes, and the external definition got garbage-collected.
  249. # TODO: consider adding some sanity check that we are here not because of some bug in
  250. # find_symbol_by_name?..
  251. continue
  252. sym_to = maybe_sym_to
  253. sym_from.refers_to.append(sym_to)
  254. sym_to.referred_from.append(sym_from)
  255. refs.append(Reference(sym_from, sym_to))
  256. if sym_to not in symbols:
  257. symbols.append(sym_to)
  258. return symbols, refs
  259. def get_symbols_and_refs(rtl_list, elf_file): # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]]
  260. elfinfo = ElfInfo(elf_file)
  261. rtl_functions = [] # type: List[RtlFunction]
  262. for file_name in rtl_list:
  263. load_rtl_file(file_name, file_name, rtl_functions)
  264. return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
  265. def list_refs_from_to_sections(refs, from_sections, to_sections): # type: (List[Reference], List[str], List[str]) -> int
  266. found = 0
  267. for ref in refs:
  268. if (not from_sections or ref.from_sym.section in from_sections) and \
  269. (not to_sections or ref.to_sym.section in to_sections):
  270. print(str(ref))
  271. found += 1
  272. return found
  273. def find_files_recursive(root_path, ext): # type: (str, str) -> Generator[str, None, None]
  274. for root, _, files in os.walk(root_path):
  275. for basename in files:
  276. if basename.endswith(ext):
  277. filename = os.path.join(root, basename)
  278. yield filename
  279. def main():
  280. parser = argparse.ArgumentParser()
  281. parser.add_argument(
  282. '--rtl-list',
  283. help='File with the list of RTL files',
  284. type=argparse.FileType('r'),
  285. )
  286. parser.add_argument(
  287. '--rtl-dir', help='Directory where to look for RTL files, recursively'
  288. )
  289. parser.add_argument(
  290. '--elf-file',
  291. required=True,
  292. help='Program ELF file',
  293. type=argparse.FileType('rb'),
  294. )
  295. action_sub = parser.add_subparsers(dest='action')
  296. find_refs_parser = action_sub.add_parser(
  297. 'find-refs',
  298. help='List the references coming from a given list of source sections'
  299. 'to a given list of target sections.',
  300. )
  301. find_refs_parser.add_argument(
  302. '--from-sections', help='comma-separated list of source sections'
  303. )
  304. find_refs_parser.add_argument(
  305. '--to-sections', help='comma-separated list of target sections'
  306. )
  307. find_refs_parser.add_argument(
  308. '--exit-code',
  309. action='store_true',
  310. help='If set, exits with non-zero code when any references found',
  311. )
  312. action_sub.add_parser(
  313. 'all-refs',
  314. help='Print the list of all references',
  315. )
  316. parser.parse_args()
  317. args = parser.parse_args()
  318. if args.rtl_list:
  319. with open(args.rtl_list, 'r') as rtl_list_file:
  320. rtl_list = [line.strip for line in rtl_list_file]
  321. else:
  322. if not args.rtl_dir:
  323. raise RuntimeError('Either --rtl-list or --rtl-dir must be specified')
  324. rtl_list = list(find_files_recursive(args.rtl_dir, '.expand'))
  325. if not rtl_list:
  326. raise RuntimeError('No RTL files specified')
  327. _, refs = get_symbols_and_refs(rtl_list, args.elf_file)
  328. if args.action == 'find-refs':
  329. from_sections = args.from_sections.split(',') if args.from_sections else []
  330. to_sections = args.to_sections.split(',') if args.to_sections else []
  331. found = list_refs_from_to_sections(
  332. refs, from_sections, to_sections
  333. )
  334. if args.exit_code and found:
  335. raise SystemExit(1)
  336. elif args.action == 'all-refs':
  337. for r in refs:
  338. print(str(r))
  339. if __name__ == '__main__':
  340. main()