check_callgraph.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. #!/usr/bin/env python
  2. #
  3. # Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
  4. # SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD
  5. # SPDX-License-Identifier: Apache-2.0
  6. import argparse
  7. import os
  8. import re
  9. from functools import partial
  10. from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple
  11. import elftools
  12. from elftools.elf import elffile
  13. FUNCTION_REGEX = re.compile(
  14. r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$'
  15. )
  16. CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
  17. SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
  18. class RtlFunction(object):
  19. def __init__(self, name: str, rtl_filename: str, tu_filename: str) -> None:
  20. self.name = name
  21. self.rtl_filename = rtl_filename
  22. self.tu_filename = tu_filename
  23. self.calls: List[str] = list()
  24. self.refs: List[str] = list()
  25. self.sym = None
  26. class SectionAddressRange(object):
  27. def __init__(self, name: str, addr: int, size: int) -> None:
  28. self.name = name
  29. self.low = addr
  30. self.high = addr + size
  31. def __str__(self) -> str:
  32. return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high)
  33. def contains_address(self, addr: int) -> bool:
  34. return self.low <= addr < self.high
  35. TARGET_SECTIONS: Dict[str, List[SectionAddressRange]] = {
  36. 'esp32': [
  37. SectionAddressRange('.rom.text', 0x40000000, 0x70000),
  38. SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018)
  39. ],
  40. 'esp32s2': [
  41. SectionAddressRange('.rom.text', 0x40000000, 0x1bed0),
  42. SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c)
  43. ],
  44. 'esp32s3': [
  45. SectionAddressRange('.rom.text', 0x40000000, 0x568d0),
  46. SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30)
  47. ]
  48. }
  49. class Symbol(object):
  50. def __init__(self, name: str, addr: int, local: bool, filename: Optional[str], section: Optional[str]) -> None:
  51. self.name = name
  52. self.addr = addr
  53. self.local = local
  54. self.filename = filename
  55. self.section = section
  56. self.refers_to: List[Symbol] = list()
  57. self.referred_from: List[Symbol] = list()
  58. def __str__(self) -> str:
  59. return '{} @0x{:08x} [{}]{} {}'.format(
  60. self.name,
  61. self.addr,
  62. self.section or 'unknown',
  63. ' (local)' if self.local else '',
  64. self.filename
  65. )
  66. class Reference(object):
  67. def __init__(self, from_sym: Symbol, to_sym: Symbol) -> None:
  68. self.from_sym = from_sym
  69. self.to_sym = to_sym
  70. def __str__(self) -> str:
  71. return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format(
  72. self.from_sym.name,
  73. self.from_sym.addr,
  74. self.from_sym.section,
  75. self.to_sym.name,
  76. self.to_sym.addr,
  77. self.to_sym.section
  78. )
  79. class IgnorePair():
  80. def __init__(self, pair: str) -> None:
  81. self.symbol, self.function_call = pair.split('/')
  82. class ElfInfo(object):
  83. def __init__(self, elf_file: BinaryIO) -> None:
  84. self.elf_file = elf_file
  85. self.elf_obj = elffile.ELFFile(self.elf_file)
  86. self.section_ranges = self._load_sections()
  87. self.symbols = self._load_symbols()
  88. def _load_symbols(self) -> List[Symbol]:
  89. symbols = []
  90. for s in self.elf_obj.iter_sections():
  91. if not isinstance(s, elftools.elf.sections.SymbolTableSection):
  92. continue
  93. filename = None
  94. for sym in s.iter_symbols():
  95. sym_type = sym.entry['st_info']['type']
  96. if sym_type == 'STT_FILE':
  97. filename = sym.name
  98. if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']:
  99. local = sym.entry['st_info']['bind'] == 'STB_LOCAL'
  100. addr = sym.entry['st_value']
  101. symbols.append(
  102. Symbol(
  103. sym.name,
  104. addr,
  105. local,
  106. filename if local else None,
  107. self.section_for_addr(addr),
  108. )
  109. )
  110. return symbols
  111. def _load_sections(self) -> List[SectionAddressRange]:
  112. result = []
  113. for segment in self.elf_obj.iter_segments():
  114. if segment['p_type'] == 'PT_LOAD':
  115. for section in self.elf_obj.iter_sections():
  116. if not segment.section_in_segment(section):
  117. continue
  118. result.append(
  119. SectionAddressRange(
  120. section.name, section['sh_addr'], section['sh_size']
  121. )
  122. )
  123. target = os.environ.get('IDF_TARGET')
  124. if target in TARGET_SECTIONS:
  125. result += TARGET_SECTIONS[target]
  126. return result
  127. def symbols_by_name(self, name: str) -> List['Symbol']:
  128. res = []
  129. for sym in self.symbols:
  130. if sym.name == name:
  131. res.append(sym)
  132. return res
  133. def section_for_addr(self, sym_addr: int) -> Optional[str]:
  134. for sar in self.section_ranges:
  135. if sar.contains_address(sym_addr):
  136. return sar.name
  137. return None
  138. def load_rtl_file(rtl_filename: str, tu_filename: str, functions: List[RtlFunction], ignore_pairs: List[IgnorePair]) -> None:
  139. last_function: Optional[RtlFunction] = None
  140. for line in open(rtl_filename):
  141. # Find function definition
  142. match = re.match(FUNCTION_REGEX, line)
  143. if match:
  144. function_name = match.group('function')
  145. last_function = RtlFunction(function_name, rtl_filename, tu_filename)
  146. functions.append(last_function)
  147. continue
  148. if last_function:
  149. # Find direct function calls
  150. match = re.match(CALL_REGEX, line)
  151. if match:
  152. target = match.group('target')
  153. # if target matches on of the IgnorePair function_call attributes, remove
  154. # the last occurrence of the associated symbol from the last_function.refs list.
  155. call_matching_pairs = [pair for pair in ignore_pairs if pair.function_call == target]
  156. if call_matching_pairs and last_function and last_function.refs:
  157. for pair in call_matching_pairs:
  158. ignored_symbols = [ref for ref in last_function.refs if pair.symbol in ref]
  159. if ignored_symbols:
  160. last_ref = ignored_symbols.pop()
  161. last_function.refs = [ref for ref in last_function.refs if last_ref != ref]
  162. if target not in last_function.calls:
  163. last_function.calls.append(target)
  164. continue
  165. # Find symbol references
  166. match = re.match(SYMBOL_REF_REGEX, line)
  167. if match:
  168. target = match.group('target')
  169. if target not in last_function.refs:
  170. last_function.refs.append(target)
  171. continue
  172. def rtl_filename_matches_sym_filename(rtl_filename: str, symbol_filename: str) -> bool:
  173. # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
  174. # RTL file names are paths relative to the build directory, e.g.:
  175. # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
  176. #
  177. # The check below may give a false positive if there are two files with the same name in
  178. # different directories. This doesn't seem to happen in IDF now, but if it does happen,
  179. # an assert in find_symbol_by_rtl_func should catch this.
  180. #
  181. # If this becomes and issue, consider also loading the .map file and using it to figure out
  182. # which object file was used as the source of each symbol. Names of the object files and RTL files
  183. # should be much easier to match.
  184. return os.path.basename(rtl_filename).startswith(symbol_filename)
  185. class SymbolNotFound(RuntimeError):
  186. pass
  187. def find_symbol_by_name(name: str, elfinfo: ElfInfo, local_func_matcher: Callable[[Symbol], bool]) -> Optional[Symbol]:
  188. """
  189. Find an ELF symbol for the given name.
  190. local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
  191. """
  192. syms = elfinfo.symbols_by_name(name)
  193. if not syms:
  194. return None
  195. if len(syms) == 1:
  196. return syms[0]
  197. else:
  198. # There are multiple symbols with a given name. Find the best fit.
  199. local_candidate = None
  200. global_candidate = None
  201. for sym in syms:
  202. if not sym.local:
  203. assert not global_candidate # can't have two global symbols with the same name
  204. global_candidate = sym
  205. elif local_func_matcher(sym):
  206. assert not local_candidate # can't have two symbols with the same name in a single file
  207. local_candidate = sym
  208. # If two symbols with the same name are defined, a global and a local one,
  209. # prefer the local symbol as the reference target.
  210. return local_candidate or global_candidate
  211. def match_local_source_func(rtl_filename: str, sym: Symbol) -> bool:
  212. """
  213. Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
  214. reference source (caller), based on the RTL file name.
  215. """
  216. assert sym.filename # should be set for local functions
  217. return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
  218. def match_local_target_func(rtl_filename: str, sym_from: Symbol, sym: Symbol) -> bool:
  219. """
  220. Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
  221. reference target (callee or referenced data), based on RTL filename of the source symbol
  222. and the source symbol itself.
  223. """
  224. assert sym.filename # should be set for local functions
  225. if sym_from.local:
  226. # local symbol referencing another local symbol
  227. return sym_from.filename == sym.filename
  228. else:
  229. # global symbol referencing a local symbol;
  230. # source filename is not known, use RTL filename as a hint
  231. return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
  232. def match_rtl_funcs_to_symbols(rtl_functions: List[RtlFunction], elfinfo: ElfInfo) -> Tuple[List[Symbol], List[Reference]]:
  233. symbols: List[Symbol] = []
  234. refs: List[Reference] = []
  235. # General idea:
  236. # - iterate over RTL functions.
  237. # - for each RTL function, find the corresponding symbol
  238. # - iterate over the functions and variables referenced from this RTL function
  239. # - find symbols corresponding to the references
  240. # - record every pair (sym_from, sym_to) as a Reference object
  241. for source_rtl_func in rtl_functions:
  242. maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
  243. if maybe_sym_from is None:
  244. # RTL references a symbol, but the symbol is not defined in the generated object file.
  245. # This means that the symbol was likely removed (or not included) at link time.
  246. # There is nothing we can do to check section placement in this case.
  247. continue
  248. sym_from = maybe_sym_from
  249. if sym_from not in symbols:
  250. symbols.append(sym_from)
  251. for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
  252. if '*.LC' in target_rtl_func_name: # skip local labels
  253. continue
  254. maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
  255. if not maybe_sym_to:
  256. # This may happen for a extern reference in the RTL file, if the reference was later removed
  257. # by one of the optimization passes, and the external definition got garbage-collected.
  258. # TODO: consider adding some sanity check that we are here not because of some bug in
  259. # find_symbol_by_name?..
  260. continue
  261. sym_to = maybe_sym_to
  262. sym_from.refers_to.append(sym_to)
  263. sym_to.referred_from.append(sym_from)
  264. refs.append(Reference(sym_from, sym_to))
  265. if sym_to not in symbols:
  266. symbols.append(sym_to)
  267. return symbols, refs
  268. def get_symbols_and_refs(rtl_list: List[str], elf_file: BinaryIO, ignore_pairs: List[IgnorePair]) -> Tuple[List[Symbol], List[Reference]]:
  269. elfinfo = ElfInfo(elf_file)
  270. rtl_functions: List[RtlFunction] = []
  271. for file_name in rtl_list:
  272. load_rtl_file(file_name, file_name, rtl_functions, ignore_pairs)
  273. return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
  274. def list_refs_from_to_sections(refs: List[Reference], from_sections: List[str], to_sections: List[str]) -> int:
  275. found = 0
  276. for ref in refs:
  277. if (not from_sections or ref.from_sym.section in from_sections) and \
  278. (not to_sections or ref.to_sym.section in to_sections):
  279. print(str(ref))
  280. found += 1
  281. return found
  282. def find_files_recursive(root_path: str, ext: str) -> Generator[str, None, None]:
  283. for root, _, files in os.walk(root_path):
  284. for basename in files:
  285. if basename.endswith(ext):
  286. filename = os.path.join(root, basename)
  287. yield filename
  288. def main() -> None:
  289. parser = argparse.ArgumentParser()
  290. parser.add_argument(
  291. '--rtl-list',
  292. help='File with the list of RTL files',
  293. type=argparse.FileType('r'),
  294. )
  295. parser.add_argument(
  296. '--rtl-dirs', help='comma-separated list of directories where to look for RTL files, recursively'
  297. )
  298. parser.add_argument(
  299. '--elf-file',
  300. required=True,
  301. help='Program ELF file',
  302. type=argparse.FileType('rb'),
  303. )
  304. action_sub = parser.add_subparsers(dest='action')
  305. find_refs_parser = action_sub.add_parser(
  306. 'find-refs',
  307. help='List the references coming from a given list of source sections'
  308. 'to a given list of target sections.',
  309. )
  310. find_refs_parser.add_argument(
  311. '--from-sections', help='comma-separated list of source sections'
  312. )
  313. find_refs_parser.add_argument(
  314. '--to-sections', help='comma-separated list of target sections'
  315. )
  316. find_refs_parser.add_argument(
  317. '--ignore-symbols', help='comma-separated list of symbol/function_name pairs. \
  318. This will force the parser to ignore the symbol preceding the call to function_name'
  319. )
  320. find_refs_parser.add_argument(
  321. '--exit-code',
  322. action='store_true',
  323. help='If set, exits with non-zero code when any references found',
  324. )
  325. action_sub.add_parser(
  326. 'all-refs',
  327. help='Print the list of all references',
  328. )
  329. parser.parse_args()
  330. args = parser.parse_args()
  331. if args.rtl_list:
  332. with open(args.rtl_list, 'r') as rtl_list_file:
  333. rtl_list = [line.strip() for line in rtl_list_file]
  334. else:
  335. if not args.rtl_dirs:
  336. raise RuntimeError('Either --rtl-list or --rtl-dirs must be specified')
  337. rtl_dirs = args.rtl_dirs.split(',')
  338. rtl_list = []
  339. for dir in rtl_dirs:
  340. rtl_list.extend(list(find_files_recursive(dir, '.expand')))
  341. if not rtl_list:
  342. raise RuntimeError('No RTL files specified')
  343. ignore_pairs = []
  344. for pair in args.ignore_symbols.split(',') if args.ignore_symbols else []:
  345. ignore_pairs.append(IgnorePair(pair))
  346. _, refs = get_symbols_and_refs(rtl_list, args.elf_file, ignore_pairs)
  347. if args.action == 'find-refs':
  348. from_sections = args.from_sections.split(',') if args.from_sections else []
  349. to_sections = args.to_sections.split(',') if args.to_sections else []
  350. found = list_refs_from_to_sections(
  351. refs, from_sections, to_sections
  352. )
  353. if args.exit_code and found:
  354. raise SystemExit(1)
  355. elif args.action == 'all-refs':
  356. for r in refs:
  357. print(str(r))
  358. if __name__ == '__main__':
  359. main()