| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334 |
- #
- # Copyright (c) 2021 Project CHIP Authors
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- """Collect information from various sources into Memory Map DataFrames."""
- import bisect
- from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple
- import memdf.collector.bloaty
- import memdf.collector.csv
- import memdf.collector.elftools
- import memdf.collector.readelf
- import memdf.collector.su
- import memdf.name
- import memdf.select
- import memdf.util.config
- import pandas as pd # type: ignore
- from elftools.elf.constants import SH_FLAGS # type: ignore
- from memdf import DF, Config, ConfigDescription, DFs, ExtentDF, SectionDF, SymbolDF
- from memdf.collector.util import simplify_source
- PREFIX_CONFIG: ConfigDescription = {
- 'collect.prefix': {
- 'help': 'Strip PATH from the beginning of source file names',
- 'metavar': 'PATH',
- 'default': [],
- 'argparse': {
- 'alias': ['--prefix', '--strip-prefix'],
- 'action': 'append',
- }
- },
- }
- CONFIG: ConfigDescription = {
- Config.group_def('input'): {
- 'title': 'input options',
- },
- Config.group_def('tool'): {
- 'title': 'external tool options',
- },
- Config.group_map('collect'): {
- 'group': 'input'
- },
- **memdf.collector.bloaty.CONFIG,
- **memdf.collector.csv.CONFIG,
- **memdf.collector.elftools.CONFIG,
- **memdf.collector.readelf.CONFIG,
- 'collect.method': {
- 'help':
- 'Method of input processing: one of'
- ' elftools, readelf, bloaty, csv, tsv, su.',
- 'metavar': 'METHOD',
- 'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
- 'default': 'elftools',
- 'argparse': {
- 'alias': ['-f'],
- },
- },
- **PREFIX_CONFIG,
- }
- ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"])
- def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF:
- """Postprocess a symbol table after collecting from one source.
- If the symbol table contains FILE symbols, they will be removed and
- replaced by a 'file' column on other symbols.
- If the symbol table contains ARM mode symbols, they will be removed
- and replaced by an 'arm' column on other symbols.
- """
- files = []
- arms = []
- arm_symbols = {}
- current_file = ''
- current_arm = ''
- has_file = False
- if config['collect.prefix-file']:
- prefixes = config.get_re('collect.prefix')
- else:
- prefixes = None
- if 'type' in symbols.columns:
- for symbol in symbols.itertuples():
- if symbol.type == 'FILE':
- has_file = True
- current_file = symbol.symbol
- if prefixes:
- current_file = simplify_source(current_file, prefixes)
- elif symbol.type == 'NOTYPE':
- if symbol.symbol.startswith('$'):
- if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS:
- current_arm = symbol.symbol
- arm_symbols[current_arm] = True
- files.append(current_file)
- arms.append(current_arm)
- if has_file:
- symbols['file'] = files
- if current_arm:
- symbols['arm'] = arms
- if has_file:
- symbols = symbols[symbols['type'] != 'FILE']
- if current_arm:
- syms = arm_symbols.keys()
- symbols = symbols[~symbols.symbol.isin(syms)]
- return symbols
- def postprocess_file(config: Config, dfs: DFs) -> None:
- """Postprocess tables after collecting from one source."""
- if SymbolDF.name in dfs:
- dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name])
- def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
- """Account for space not used by any symbol, or by multiple symbols."""
- # These symbols mark the start or end of unused space.
- start_unused = frozenset(config.get('symbol.free.start', []))
- end_unused = frozenset(config.get('symbol.free.end', []))
- extent_columns = ['address', 'size', 'section', 'file']
- need_cu = 'cu' in symbols.columns
- if need_cu:
- extent_columns.append('cu')
- need_input = 'input' in symbols.columns
- if need_input:
- extent_columns.append('input')
- columns = ['symbol', *extent_columns, 'type', 'bind']
- def filler(name, address, size, previous, current) -> List:
- row = [
- name, # symbol
- address, # address
- size, # size
- (previous.section if previous else
- current.section if current else memdf.name.UNDEF), # section
- (previous.file
- if previous else current.file if current else ''), # file
- ]
- if need_cu:
- row.append(
- previous.cu if previous else current.cu if current else '')
- if need_input:
- row.append(previous.input if previous else current.
- input if current else '')
- row.append('NOTYPE') # type
- row.append('LOCAL') # bind
- return row
- def fill_gap(previous, current, from_address,
- to_address) -> Tuple[str, List]:
- """Add a row for a unaccounted gap or unused space."""
- size = to_address - from_address
- if (previous is None or previous.symbol in start_unused
- or current.symbol in end_unused):
- use = 'unused'
- name = memdf.name.unused(from_address, size)
- else:
- use = 'gap'
- name = memdf.name.gap(from_address, size)
- return (use, filler(name, from_address, size, previous, current))
- def fill_overlap(previous, current, from_address,
- to_address) -> Tuple[str, List]:
- """Add a row for overlap."""
- size = to_address - from_address
- return ('overlap',
- filler(memdf.name.overlap(from_address, -size), from_address,
- size, previous, current))
- # Find the address range for sections that are configured or allocated.
- config_sections = set()
- for _, s in config.get('region.sections', {}).items():
- config_sections |= set(s)
- section_to_range = {}
- start_to_section = {}
- section_starts = [0]
- for s in sections.itertuples():
- if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
- section_to_range[s.section] = range(s.address, s.address + s.size)
- start_to_section[s.address] = s.section
- section_starts.append(s.address)
- section_starts.sort()
- new_symbols: Dict[str, List[list]] = {
- 'gap': [],
- 'unused': [],
- 'overlap': []
- }
- section_range = None
- previous_symbol = None
- current_address = 0
- iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
- & (symbols.type != 'FILE')
- & symbols.section.isin(section_to_range)]
- iterable_symbols = iterable_symbols.sort_values(by='address')
- for symbol in iterable_symbols.itertuples():
- if not previous_symbol or symbol.section != previous_symbol.section:
- # We sometimes see symbols that have the value of their section end
- # address (so they are not actually within the section) and have
- # the same address as a symbol in the next section.
- symbol_address_section = start_to_section.get(section_starts[
- bisect.bisect_right(section_starts, symbol.address) - 1])
- if symbol_address_section != symbol.section:
- continue
- # Starting or switching sections.
- if previous_symbol and section_range:
- # previous_symbol is the last in its section.
- if current_address < section_range[-1] + 1:
- use, row = fill_gap(previous_symbol, previous_symbol,
- current_address, section_range[-1] + 1)
- new_symbols[use].append(row)
- # Start of section.
- previous_symbol = None
- section_range = section_to_range.get(symbol.section)
- if section_range:
- current_address = section_range[0]
- if section_range:
- if current_address < symbol.address:
- use, row = fill_gap(previous_symbol, symbol, current_address,
- symbol.address)
- new_symbols[use].append(row)
- elif current_address > symbol.address:
- use, row = fill_overlap(previous_symbol, symbol,
- current_address, symbol.address)
- new_symbols[use].append(row)
- current_address = symbol.address + symbol.size
- previous_symbol = symbol
- dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
- symbols = pd.concat([symbols, *dfs.values()]).fillna('')
- symbols.sort_values(by='address', inplace=True)
- for k in dfs:
- dfs[k] = ExtentDF(dfs[k][extent_columns])
- dfs[k].attrs['name'] = k
- dfs[SymbolDF.name] = SymbolDF(symbols)
- return dfs
- def postprocess_collected(config: Config, dfs: DFs) -> None:
- """Postprocess tables after reading all sources."""
- # Prune tables according to configuration options. This happens before
- # fill_holes() so that space of any pruned symbols will be accounted for,
- # and to avoid unnecessary work for pruned sections.
- for c in [SymbolDF, SectionDF]:
- if c.name in dfs:
- dfs[c.name] = memdf.select.select_configured(
- config, dfs[c.name], memdf.select.COLLECTED_CHOICES)
- # Account for space not used by any symbol, or by multiple symbols.
- if (SymbolDF.name in dfs and SectionDF.name in dfs
- and config.get('args.fill_holes', True)):
- dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name]))
- # Create synthetic columns (e.g. 'region') and prune tables
- # according to their configuration. This happens after fill_holes()
- # so that synthetic column values will be created for the gap symbols.
- for c in [SymbolDF, SectionDF]:
- if c.name in dfs:
- for column in memdf.select.SYNTHETIC_CHOICES:
- dfs[c.name] = memdf.select.synthesize_column(
- config, dfs[c.name], column)
- dfs[c.name] = memdf.select.select_configured_column(
- config, dfs[c.name], column)
- for df in dfs.values():
- if demangle := set((c for c in df.columns if c.endswith('symbol'))):
- df.attrs['demangle'] = demangle
- if hexify := set((c for c in df.columns if c.endswith('address'))):
- df.attrs['hexify'] = hexify
- FileReader = Callable[[Config, str, str], DFs]
- FILE_READERS: Dict[str, FileReader] = {
- 'bloaty': memdf.collector.bloaty.read_file,
- 'elftools': memdf.collector.elftools.read_file,
- 'readelf': memdf.collector.readelf.read_file,
- 'csv': memdf.collector.csv.read_file,
- 'tsv': memdf.collector.csv.read_file,
- 'su': memdf.collector.su.read_dir,
- }
- def collect_files(config: Config,
- files: Optional[List[str]] = None,
- method: Optional[str] = None) -> DFs:
- """Read a filtered memory map from a set of files."""
- filenames = files if files else config.get('args.inputs', [])
- if method is None:
- method = config.get('collect.method', 'csv')
- frames: Dict[str, List[DF]] = {}
- for filename in filenames:
- dfs: DFs = FILE_READERS[method](config, filename, method)
- postprocess_file(config, dfs)
- for k, frame in dfs.items():
- if k not in frames:
- frames[k] = []
- frames[k].append(frame)
- dfs = {}
- for k, v in frames.items():
- dfs[k] = pd.concat(v, ignore_index=True)
- postprocess_collected(config, dfs)
- return dfs
- def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config:
- """Common argument parsing for collection tools."""
- config = Config().init({
- **memdf.util.config.CONFIG,
- **CONFIG,
- **config_desc
- })
- config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
- return config.parse(argv)
|