| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403 |
- #!/usr/bin/env python
- #
- # Based on cally.py (https://github.com/chaudron/cally/), Copyright 2018, Eelco Chaudron
- # SPDX-FileCopyrightText: 2020-2022 Espressif Systems (Shanghai) CO LTD
- # SPDX-License-Identifier: Apache-2.0
- import argparse
- import os
- import re
- from functools import partial
- from typing import BinaryIO, Callable, Dict, Generator, List, Optional, Tuple
- import elftools
- from elftools.elf import elffile
- FUNCTION_REGEX = re.compile(
- r'^;; Function (?P<mangle>.*)\s+\((?P<function>\S+)(,.*)?\).*$'
- )
- CALL_REGEX = re.compile(r'^.*\(call.*"(?P<target>.*)".*$')
- SYMBOL_REF_REGEX = re.compile(r'^.*\(symbol_ref[^()]*\("(?P<target>.*)"\).*$')
- class RtlFunction(object):
- def __init__(self, name: str, rtl_filename: str, tu_filename: str) -> None:
- self.name = name
- self.rtl_filename = rtl_filename
- self.tu_filename = tu_filename
- self.calls: List[str] = list()
- self.refs: List[str] = list()
- self.sym = None
- class SectionAddressRange(object):
- def __init__(self, name: str, addr: int, size: int) -> None:
- self.name = name
- self.low = addr
- self.high = addr + size
- def __str__(self) -> str:
- return '{}: 0x{:08x} - 0x{:08x}'.format(self.name, self.low, self.high)
- def contains_address(self, addr: int) -> bool:
- return self.low <= addr < self.high
- TARGET_SECTIONS: Dict[str, List[SectionAddressRange]] = {
- 'esp32': [
- SectionAddressRange('.rom.text', 0x40000000, 0x70000),
- SectionAddressRange('.rom.rodata', 0x3ff96000, 0x9018)
- ],
- 'esp32s2': [
- SectionAddressRange('.rom.text', 0x40000000, 0x1bed0),
- SectionAddressRange('.rom.rodata', 0x3ffac600, 0x392c)
- ],
- 'esp32s3': [
- SectionAddressRange('.rom.text', 0x40000000, 0x568d0),
- SectionAddressRange('.rom.rodata', 0x3ff071c0, 0x8e30)
- ]
- }
- class Symbol(object):
- def __init__(self, name: str, addr: int, local: bool, filename: Optional[str], section: Optional[str]) -> None:
- self.name = name
- self.addr = addr
- self.local = local
- self.filename = filename
- self.section = section
- self.refers_to: List[Symbol] = list()
- self.referred_from: List[Symbol] = list()
- def __str__(self) -> str:
- return '{} @0x{:08x} [{}]{} {}'.format(
- self.name,
- self.addr,
- self.section or 'unknown',
- ' (local)' if self.local else '',
- self.filename
- )
- class Reference(object):
- def __init__(self, from_sym: Symbol, to_sym: Symbol) -> None:
- self.from_sym = from_sym
- self.to_sym = to_sym
- def __str__(self) -> str:
- return '{} @0x{:08x} ({}) -> {} @0x{:08x} ({})'.format(
- self.from_sym.name,
- self.from_sym.addr,
- self.from_sym.section,
- self.to_sym.name,
- self.to_sym.addr,
- self.to_sym.section
- )
- class ElfInfo(object):
- def __init__(self, elf_file: BinaryIO) -> None:
- self.elf_file = elf_file
- self.elf_obj = elffile.ELFFile(self.elf_file)
- self.section_ranges = self._load_sections()
- self.symbols = self._load_symbols()
- def _load_symbols(self) -> List[Symbol]:
- symbols = []
- for s in self.elf_obj.iter_sections():
- if not isinstance(s, elftools.elf.sections.SymbolTableSection):
- continue
- filename = None
- for sym in s.iter_symbols():
- sym_type = sym.entry['st_info']['type']
- if sym_type == 'STT_FILE':
- filename = sym.name
- if sym_type in ['STT_NOTYPE', 'STT_FUNC', 'STT_OBJECT']:
- local = sym.entry['st_info']['bind'] == 'STB_LOCAL'
- addr = sym.entry['st_value']
- symbols.append(
- Symbol(
- sym.name,
- addr,
- local,
- filename if local else None,
- self.section_for_addr(addr),
- )
- )
- return symbols
- def _load_sections(self) -> List[SectionAddressRange]:
- result = []
- for segment in self.elf_obj.iter_segments():
- if segment['p_type'] == 'PT_LOAD':
- for section in self.elf_obj.iter_sections():
- if not segment.section_in_segment(section):
- continue
- result.append(
- SectionAddressRange(
- section.name, section['sh_addr'], section['sh_size']
- )
- )
- target = os.environ.get('IDF_TARGET')
- if target in TARGET_SECTIONS:
- result += TARGET_SECTIONS[target]
- return result
- def symbols_by_name(self, name: str) -> List['Symbol']:
- res = []
- for sym in self.symbols:
- if sym.name == name:
- res.append(sym)
- return res
- def section_for_addr(self, sym_addr: int) -> Optional[str]:
- for sar in self.section_ranges:
- if sar.contains_address(sym_addr):
- return sar.name
- return None
- def load_rtl_file(rtl_filename: str, tu_filename: str, functions: List[RtlFunction]) -> None:
- last_function: Optional[RtlFunction] = None
- for line in open(rtl_filename):
- # Find function definition
- match = re.match(FUNCTION_REGEX, line)
- if match:
- function_name = match.group('function')
- last_function = RtlFunction(function_name, rtl_filename, tu_filename)
- functions.append(last_function)
- continue
- if last_function:
- # Find direct function calls
- match = re.match(CALL_REGEX, line)
- if match:
- target = match.group('target')
- if target not in last_function.calls:
- last_function.calls.append(target)
- continue
- # Find symbol references
- match = re.match(SYMBOL_REF_REGEX, line)
- if match:
- target = match.group('target')
- if target not in last_function.refs:
- last_function.refs.append(target)
- continue
- def rtl_filename_matches_sym_filename(rtl_filename: str, symbol_filename: str) -> bool:
- # Symbol file names (from ELF debug info) are short source file names, without path: "cpu_start.c".
- # RTL file names are paths relative to the build directory, e.g.:
- # "build/esp-idf/esp_system/CMakeFiles/__idf_esp_system.dir/port/cpu_start.c.234r.expand"
- #
- # The check below may give a false positive if there are two files with the same name in
- # different directories. This doesn't seem to happen in IDF now, but if it does happen,
- # an assert in find_symbol_by_rtl_func should catch this.
- #
- # If this becomes and issue, consider also loading the .map file and using it to figure out
- # which object file was used as the source of each symbol. Names of the object files and RTL files
- # should be much easier to match.
- return os.path.basename(rtl_filename).startswith(symbol_filename)
- class SymbolNotFound(RuntimeError):
- pass
- def find_symbol_by_name(name: str, elfinfo: ElfInfo, local_func_matcher: Callable[[Symbol], bool]) -> Optional[Symbol]:
- """
- Find an ELF symbol for the given name.
- local_func_matcher is a callback function which checks is the candidate local symbol is suitable.
- """
- syms = elfinfo.symbols_by_name(name)
- if not syms:
- return None
- if len(syms) == 1:
- return syms[0]
- else:
- # There are multiple symbols with a given name. Find the best fit.
- local_candidate = None
- global_candidate = None
- for sym in syms:
- if not sym.local:
- assert not global_candidate # can't have two global symbols with the same name
- global_candidate = sym
- elif local_func_matcher(sym):
- assert not local_candidate # can't have two symbols with the same name in a single file
- local_candidate = sym
- # If two symbols with the same name are defined, a global and a local one,
- # prefer the local symbol as the reference target.
- return local_candidate or global_candidate
- def match_local_source_func(rtl_filename: str, sym: Symbol) -> bool:
- """
- Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
- reference source (caller), based on the RTL file name.
- """
- assert sym.filename # should be set for local functions
- return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
- def match_local_target_func(rtl_filename: str, sym_from: Symbol, sym: Symbol) -> bool:
- """
- Helper for match_rtl_funcs_to_symbols, checks if local symbol sym is a good candidate for the
- reference target (callee or referenced data), based on RTL filename of the source symbol
- and the source symbol itself.
- """
- assert sym.filename # should be set for local functions
- if sym_from.local:
- # local symbol referencing another local symbol
- return sym_from.filename == sym.filename
- else:
- # global symbol referencing a local symbol;
- # source filename is not known, use RTL filename as a hint
- return rtl_filename_matches_sym_filename(rtl_filename, sym.filename)
- def match_rtl_funcs_to_symbols(rtl_functions: List[RtlFunction], elfinfo: ElfInfo) -> Tuple[List[Symbol], List[Reference]]:
- symbols: List[Symbol] = []
- refs: List[Reference] = []
- # General idea:
- # - iterate over RTL functions.
- # - for each RTL function, find the corresponding symbol
- # - iterate over the functions and variables referenced from this RTL function
- # - find symbols corresponding to the references
- # - record every pair (sym_from, sym_to) as a Reference object
- for source_rtl_func in rtl_functions:
- maybe_sym_from = find_symbol_by_name(source_rtl_func.name, elfinfo, partial(match_local_source_func, source_rtl_func.rtl_filename))
- if maybe_sym_from is None:
- # RTL references a symbol, but the symbol is not defined in the generated object file.
- # This means that the symbol was likely removed (or not included) at link time.
- # There is nothing we can do to check section placement in this case.
- continue
- sym_from = maybe_sym_from
- if sym_from not in symbols:
- symbols.append(sym_from)
- for target_rtl_func_name in source_rtl_func.calls + source_rtl_func.refs:
- if '*.LC' in target_rtl_func_name: # skip local labels
- continue
- maybe_sym_to = find_symbol_by_name(target_rtl_func_name, elfinfo, partial(match_local_target_func, source_rtl_func.rtl_filename, sym_from))
- if not maybe_sym_to:
- # This may happen for a extern reference in the RTL file, if the reference was later removed
- # by one of the optimization passes, and the external definition got garbage-collected.
- # TODO: consider adding some sanity check that we are here not because of some bug in
- # find_symbol_by_name?..
- continue
- sym_to = maybe_sym_to
- sym_from.refers_to.append(sym_to)
- sym_to.referred_from.append(sym_from)
- refs.append(Reference(sym_from, sym_to))
- if sym_to not in symbols:
- symbols.append(sym_to)
- return symbols, refs
- def get_symbols_and_refs(rtl_list: List[str], elf_file: BinaryIO) -> Tuple[List[Symbol], List[Reference]]:
- elfinfo = ElfInfo(elf_file)
- rtl_functions: List[RtlFunction] = []
- for file_name in rtl_list:
- load_rtl_file(file_name, file_name, rtl_functions)
- return match_rtl_funcs_to_symbols(rtl_functions, elfinfo)
- def list_refs_from_to_sections(refs: List[Reference], from_sections: List[str], to_sections: List[str]) -> int:
- found = 0
- for ref in refs:
- if (not from_sections or ref.from_sym.section in from_sections) and \
- (not to_sections or ref.to_sym.section in to_sections):
- print(str(ref))
- found += 1
- return found
- def find_files_recursive(root_path: str, ext: str) -> Generator[str, None, None]:
- for root, _, files in os.walk(root_path):
- for basename in files:
- if basename.endswith(ext):
- filename = os.path.join(root, basename)
- yield filename
- def main() -> None:
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--rtl-list',
- help='File with the list of RTL files',
- type=argparse.FileType('r'),
- )
- parser.add_argument(
- '--rtl-dir', help='Directory where to look for RTL files, recursively'
- )
- parser.add_argument(
- '--elf-file',
- required=True,
- help='Program ELF file',
- type=argparse.FileType('rb'),
- )
- action_sub = parser.add_subparsers(dest='action')
- find_refs_parser = action_sub.add_parser(
- 'find-refs',
- help='List the references coming from a given list of source sections'
- 'to a given list of target sections.',
- )
- find_refs_parser.add_argument(
- '--from-sections', help='comma-separated list of source sections'
- )
- find_refs_parser.add_argument(
- '--to-sections', help='comma-separated list of target sections'
- )
- find_refs_parser.add_argument(
- '--exit-code',
- action='store_true',
- help='If set, exits with non-zero code when any references found',
- )
- action_sub.add_parser(
- 'all-refs',
- help='Print the list of all references',
- )
- parser.parse_args()
- args = parser.parse_args()
- if args.rtl_list:
- with open(args.rtl_list, 'r') as rtl_list_file:
- rtl_list = [line.strip() for line in rtl_list_file]
- else:
- if not args.rtl_dir:
- raise RuntimeError('Either --rtl-list or --rtl-dir must be specified')
- rtl_list = list(find_files_recursive(args.rtl_dir, '.expand'))
- if not rtl_list:
- raise RuntimeError('No RTL files specified')
- _, refs = get_symbols_and_refs(rtl_list, args.elf_file)
- if args.action == 'find-refs':
- from_sections = args.from_sections.split(',') if args.from_sections else []
- to_sections = args.to_sections.split(',') if args.to_sections else []
- found = list_refs_from_to_sections(
- refs, from_sections, to_sections
- )
- if args.exit_code and found:
- raise SystemExit(1)
- elif args.action == 'all-refs':
- for r in refs:
- print(str(r))
- if __name__ == '__main__':
- main()
|