check_callgraph.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. #!/usr/bin/env python
  2. #
  3. # Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
  4. # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. import argparse
  18. import os
  19. import re
  20. from functools import partial
  21. import elftools
  22. from elftools.elf import elffile
  23. try:
  24. from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple
  25. except ImportError:
  26. pass
  27. FUNCTION_REGEX = re.compile(
  28. r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$'
  29. )
  30. CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
  31. SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
  32. class RtlFunction(object):
  33. def __init__(self, name, rtl_filename, tu_filename):
  34. self.name = name
  35. self.rtl_filename = rtl_filename
  36. self.tu_filename = tu_filename
  37. self.calls = list() # type: List[str]
  38. self.refs = list() # type: List[str]
  39. self.sym = None
  40. class SectionAddressRange(object):
  41. def __init__(self, name, addr, size): # type: (str, int, int) -> None
  42. self.name = name
  43. self.low = addr
  44. self.high = addr + size
  45. def __str__(self):
  46. return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high)
  47. def contains_address(self, addr):
  48. return self.low <= addr < self.high
  49. TARGET_SECTIONS = {
  50. 'esp32': [
  51. SectionAddressRange('.rom.text', 0x40000000, 0x70000),
  52. SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018)
  53. ],
  54. 'esp32s2': [
  55. SectionAddressRange('.rom.text', 0x40000000, 0x1bed0),
  56. SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c)
  57. ],
  58. 'esp32s3': [
  59. SectionAddressRange('.rom.text', 0x40000000, 0x568d0),
  60. SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30)
  61. ]
  62. } # type: Dict[str, List[SectionAddressRange]]
  63. class Symbol(object):
  64. def __init__(self, name, addr, local, filename, section): # type: (str, int, bool, Optional[str], Optional[str]) -> None
  65. self.name = name
  66. self.addr = addr
  67. self.local = local
  68. self.filename = filename
  69. self.section = section
  70. self.refers_to = list() # type: List[Symbol]
  71. self.referred_from = list() # type: List[Symbol]
  72. def __str__(self):
  73. return '{} @0x{:08x} [{}]{} {}'.format(
  74. self.name,
  75. self.addr,
  76. self.section or 'unknown',
  77. ' (local)' if self.local else '',
  78. self.filename
  79. )
  80. class Reference(object):
  81. def __init__(self, from_sym, to_sym): # type: (Symbol, Symbol) -> None
  82. self.from_sym = from_sym
  83. self.to_sym = to_sym
  84. def __str__(self):
  85. return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format(
  86. self.from_sym.name,
  87. self.from_sym.addr,
  88. self.from_sym.section,
  89. self.to_sym.name,
  90. self.to_sym.addr,
  91. self.to_sym.section
  92. )
  93. class ElfInfo(object):
  94. def __init__(self, elf_file): # type: (BinaryIO) -> None
  95. self.elf_file = elf_file
  96. self.elf_obj = elffile.ELFFile(self.elf_file)
  97. self.section_ranges = self._load_sections()
  98. self.symbols = self._load_symbols()
  99. def _load_symbols(self): # type: () -> List[Symbol]
  100. symbols = []
  101. for s in self.elf_obj.iter_sections():
  102. if not isinstance(s, elftools.elf.sections.SymbolTableSection):
  103. continue
  104. filename = None
  105. for sym in s.iter_symbols():
  106. sym_type = sym.entry['st_info']['type']
  107. if sym_type == 'STT_FILE':
  108. filename = sym.name
  109. if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']:
  110. local = sym.entry['st_info']['bind'] == 'STB_LOCAL'
  111. addr = sym.entry['st_value']
  112. symbols.append(
  113. Symbol(
  114. sym.name,
  115. addr,
  116. local,
  117. filename if local else None,
  118. self.section_for_addr(addr),
  119. )
  120. )
  121. return symbols
  122. def _load_sections(self): # type: () -> List[SectionAddressRange]
  123. result = []
  124. for segment in self.elf_obj.iter_segments():
  125. if segment['p_type'] == 'PT_LOAD':
  126. for section in self.elf_obj.iter_sections():
  127. if not segment.section_in_segment(section):
  128. continue
  129. result.append(
  130. SectionAddressRange(
  131. section.name, section['sh_addr'], section['sh_size']
  132. )
  133. )
  134. target = os.environ.get('IDF_TARGET')
  135. if target in TARGET_SECTIONS:
  136. result += TARGET_SECTIONS[target]
  137. return result
  138. def symbols_by_name(self, name): # type: (str) -> List[Symbol]
  139. res = []
  140. for sym in self.symbols:
  141. if sym.name == name:
  142. res.append(sym)
  143. return res
  144. def section_for_addr(self, sym_addr): # type: (int) -> Optional[str]
  145. for sar in self.section_ranges:
  146. if sar.contains_address(sym_addr):
  147. return sar.name
  148. return None
  149. def load_rtl_file(rtl_filename, tu_filename, functions): # type: (str, str, List[RtlFunction]) -> None
  150. last_function = None # type: Optional[RtlFunction]
  151. for line in open(rtl_filename):
  152. # Find function definition
  153. match = re.match(FUNCTION_REGEX, line)
  154. if match:
  155. function_name = match.group('function')
  156. last_function = RtlFunction(function_name, rtl_filename, tu_filename)
  157. functions.append(last_function)
  158. continue
  159. if last_function:
  160. # Find direct function calls
  161. match = re.match(CALL_REGEX, line)
  162. if match:
  163. target = match.group('target')
  164. if target not in last_function.calls:
  165. last_function.calls.append(target)
  166. continue
  167. # Find symbol references
  168. match = re.match(SYMBOL_REF_REGEX, line)
  169. if match:
  170. target = match.group('target')
  171. if target not in last_function.refs:
  172. last_function.refs.append(target)
  173. continue
  174. def rtl_filename_matches_sym_filename(rtl_filename, symbol_filename): # type: (str, str) -> bool
  175. # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
  176. # RTL file names are paths relative to the build directory, e.g.:
  177. # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
  178. #
  179. # The check below may give a false positive if there are two files with the same name in
  180. # different directories. This doesn't seem to happen in IDF now, but if it does happen,
  181. # an assert in find_symbol_by_rtl_func should catch this.
  182. #
  183. # If this becomes and issue, consider also loading the .map file and using it to figure out
  184. # which object file was used as the source of each symbol. Names of the object files and RTL files
  185. # should be much easier to match.
  186. return os.path.basename(rtl_filename).startswith(symbol_filename)
  187. class SymbolNotFound(RuntimeError):
  188. pass
  189. def find_symbol_by_name(name, elfinfo, local_func_matcher): # type: (str, ElfInfo, Callable[[Symbol], bool]) -> Optional[Symbol]
  190. """
  191. Find an ELF symbol for the given name.
  192. local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
  193. """
  194. syms = elfinfo.symbols_by_name(name)
  195. if not syms:
  196. return None
  197. if len(syms) == 1:
  198. return syms[0]
  199. else:
  200. # There are multiple symbols with a given name. Find the best fit.
  201. local_candidate = None
  202. global_candidate = None
  203. for sym in syms:
  204. if not sym.local:
  205. assert not global_candidate # can't have two global symbols with the same name
  206. global_candidate = sym
  207. elif local_func_matcher(sym):
  208. assert not local_candidate # can't have two symbols with the same name in a single file
  209. local_candidate = sym
  210. # If two symbols with the same name are defined, a global and a local one,
  211. # prefer the local symbol as the reference target.
  212. return local_candidate or global_candidate
  213. def match_local_source_func(rtl_filename, sym): # type: (str, Symbol) -> bool
  214. """
  215. Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
  216. reference source (caller), based on the RTL file name.
  217. """
  218. assert sym.filename # should be set for local functions
  219. return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
  220. def match_local_target_func(rtl_filename, sym_from, sym): # type: (str, Symbol, Symbol) -> bool
  221. """
  222. Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
  223. reference target (callee or referenced data), based on RTL filename of the source symbol
  224. and the source symbol itself.
  225. """
  226. assert sym.filename # should be set for local functions
  227. if sym_from.local:
  228. # local symbol referencing another local symbol
  229. return sym_from.filename == sym.filename
  230. else:
  231. # global symbol referencing a local symbol;
  232. # source filename is not known, use RTL filename as a hint
  233. return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
  234. def match_rtl_funcs_to_symbols(rtl_functions, elfinfo): # type: (List[RtlFunction], ElfInfo) -> Tuple[List[Symbol], List[Reference]]
  235. symbols = [] # type: List[Symbol]
  236. refs = [] # type: List[Reference]
  237. # General idea:
  238. # - iterate over RTL functions.
  239. # - for each RTL function, find the corresponding symbol
  240. # - iterate over the functions and variables referenced from this RTL function
  241. # - find symbols corresponding to the references
  242. # - record every pair (sym_from, sym_to) as a Reference object
  243. for source_rtl_func in rtl_functions:
  244. maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
  245. if maybe_sym_from is None:
  246. # RTL references a symbol, but the symbol is not defined in the generated object file.
  247. # This means that the symbol was likely removed (or not included) at link time.
  248. # There is nothing we can do to check section placement in this case.
  249. continue
  250. sym_from = maybe_sym_from
  251. if sym_from not in symbols:
  252. symbols.append(sym_from)
  253. for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
  254. if '*.LC' in target_rtl_func_name: # skip local labels
  255. continue
  256. maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
  257. if not maybe_sym_to:
  258. # This may happen for a extern reference in the RTL file, if the reference was later removed
  259. # by one of the optimization passes, and the external definition got garbage-collected.
  260. # TODO: consider adding some sanity check that we are here not because of some bug in
  261. # find_symbol_by_name?..
  262. continue
  263. sym_to = maybe_sym_to
  264. sym_from.refers_to.append(sym_to)
  265. sym_to.referred_from.append(sym_from)
  266. refs.append(Reference(sym_from, sym_to))
  267. if sym_to not in symbols:
  268. symbols.append(sym_to)
  269. return symbols, refs
  270. def get_symbols_and_refs(rtl_list, elf_file): # type: (List[str], BinaryIO) -> Tuple[List[Symbol], List[Reference]]
  271. elfinfo = ElfInfo(elf_file)
  272. rtl_functions = [] # type: List[RtlFunction]
  273. for file_name in rtl_list:
  274. load_rtl_file(file_name, file_name, rtl_functions)
  275. return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
  276. def list_refs_from_to_sections(refs, from_sections, to_sections): # type: (List[Reference], List[str], List[str]) -> int
  277. found = 0
  278. for ref in refs:
  279. if (not from_sections or ref.from_sym.section in from_sections) and \
  280. (not to_sections or ref.to_sym.section in to_sections):
  281. print(str(ref))
  282. found += 1
  283. return found
  284. def find_files_recursive(root_path, ext): # type: (str, str) -> Generator[str, None, None]
  285. for root, _, files in os.walk(root_path):
  286. for basename in files:
  287. if basename.endswith(ext):
  288. filename = os.path.join(root, basename)
  289. yield filename
  290. def main():
  291. parser = argparse.ArgumentParser()
  292. parser.add_argument(
  293. '--rtl-list',
  294. help='File with the list of RTL files',
  295. type=argparse.FileType('r'),
  296. )
  297. parser.add_argument(
  298. '--rtl-dir', help='Directory where to look for RTL files, recursively'
  299. )
  300. parser.add_argument(
  301. '--elf-file',
  302. required=True,
  303. help='Program ELF file',
  304. type=argparse.FileType('rb'),
  305. )
  306. action_sub = parser.add_subparsers(dest='action')
  307. find_refs_parser = action_sub.add_parser(
  308. 'find-refs',
  309. help='List the references coming from a given list of source sections'
  310. 'to a given list of target sections.',
  311. )
  312. find_refs_parser.add_argument(
  313. '--from-sections', help='comma-separated list of source sections'
  314. )
  315. find_refs_parser.add_argument(
  316. '--to-sections', help='comma-separated list of target sections'
  317. )
  318. find_refs_parser.add_argument(
  319. '--exit-code',
  320. action='store_true',
  321. help='If set, exits with non-zero code when any references found',
  322. )
  323. action_sub.add_parser(
  324. 'all-refs',
  325. help='Print the list of all references',
  326. )
  327. parser.parse_args()
  328. args = parser.parse_args()
  329. if args.rtl_list:
  330. with open(args.rtl_list, 'r') as rtl_list_file:
  331. rtl_list = [line.strip for line in rtl_list_file]
  332. else:
  333. if not args.rtl_dir:
  334. raise RuntimeError('Either --rtl-list or --rtl-dir must be specified')
  335. rtl_list = list(find_files_recursive(args.rtl_dir, '.expand'))
  336. if not rtl_list:
  337. raise RuntimeError('No RTL files specified')
  338. _, refs = get_symbols_and_refs(rtl_list, args.elf_file)
  339. if args.action == 'find-refs':
  340. from_sections = args.from_sections.split(',') if args.from_sections else []
  341. to_sections = args.to_sections.split(',') if args.to_sections else []
  342. found = list_refs_from_to_sections(
  343. refs, from_sections, to_sections
  344. )
  345. if args.exit_code and found:
  346. raise SystemExit(1)
  347. elif args.action == 'all-refs':
  348. for r in refs:
  349. print(str(r))
  350. if __name__ == '__main__':
  351. main()