| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508 |
- #
- # Copyright (c) 2021 Project CHIP Authors
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- """Memory DataFrame output and related utilities."""
- import contextlib
- import io
- import json
- import pathlib
- import sys
- from typing import IO, Any, Callable, Dict, List, Mapping, Optional, Protocol, Sequence, Union
- import cxxfilt # type: ignore
- import memdf.df
- import memdf.select
- import memdf.util.pretty
- import pandas as pd # type: ignore
- from memdf import DF, Config, ConfigDescription, DFs
- from memdf.util.config import ParseSizeAction
- REPORT_DEMANGLE_CONFIG: ConfigDescription = {
- Config.group_map('report'): {
- 'group': 'output'
- },
- 'report.demangle': {
- 'help': 'Demangle C++ symbol names',
- 'default': False,
- 'argparse': {
- 'alias': ['--demangle', '-C'],
- 'action': 'store_true',
- },
- },
- }
- REPORT_LIMIT_CONFIG: ConfigDescription = {
- Config.group_map('report'): {
- 'group': 'output'
- },
- 'report.limit': {
- 'help': 'Limit display to items above the given size',
- 'metavar': 'BYTES',
- 'default': 0,
- 'argparse': {
- 'alias': ['--limit'],
- 'action': ParseSizeAction,
- },
- }
- }
- REPORT_CONFIG: ConfigDescription = {
- **REPORT_DEMANGLE_CONFIG,
- **REPORT_LIMIT_CONFIG,
- }
- def postprocess_report_by(config: Config, key: str, info: Mapping) -> None:
- """For --report-by=region, select all sections."""
- assert key == 'report.by'
- if config.get(key) == 'region':
- config.put('section.select-all', True),
- REPORT_BY_CONFIG: ConfigDescription = {
- 'report.by': {
- 'help': 'Reporting group',
- 'metavar': 'GROUP',
- 'choices': memdf.select.SELECTION_CHOICES,
- 'default': 'section',
- 'argparse': {
- 'alias': ['--by'],
- },
- 'postprocess': postprocess_report_by,
- },
- }
- def demangle(symbol: str):
- try:
- symbol = cxxfilt.demangle(symbol, external_only=False)
- except cxxfilt.InvalidName:
- pass
- return symbol
- def hierify_rows(table: Sequence[Sequence[Any]]) -> List[List[Any]]:
- if not table:
- return table
- persist = None
- rows = []
- for row in table:
- if persist is None:
- persist = [None] * len(row)
- new_persist = []
- new_row = []
- changed = False
- for old, new in zip(persist, list(row)):
- if not changed and isinstance(new, str) and new == old:
- new_row.append('')
- new_persist.append(old)
- else:
- changed = True
- new_row.append(new)
- new_persist.append(new)
- rows.append(new_row)
- persist = new_persist
- return rows
- def hierify(df: pd.DataFrame) -> pd.DataFrame:
- columns = list(df.columns)
- rows = hierify_rows(df.itertuples(index=False))
- r = pd.DataFrame(rows, columns=columns)
- r.attrs = df.attrs
- return r
- # Output
- OUTPUT_FILE_CONFIG: ConfigDescription = {
- Config.group_def('output'): {
- 'title': 'output options',
- },
- 'output.file': {
- 'help': 'Output file',
- 'metavar': 'FILENAME',
- 'default': None,
- 'argparse': {
- 'alias': ['--output', '-O'],
- },
- },
- }
- def postprocess_output_metadata(config: Config, key: str,
- info: Mapping) -> None:
- """For --output-metadata=KEY:VALUE list, convert to dictionary."""
- assert key == 'output.metadata'
- metadata = {}
- for s in config.get(key):
- if ':' in s:
- k, v = s.split(':', 1)
- else:
- k, v = s, True
- metadata[k] = v
- config.put(key, metadata)
- OutputOption = Union[IO, str, None]
- @contextlib.contextmanager
- def open_output(config: Config,
- output: OutputOption = None,
- suffix: Optional[str] = None):
- if isinstance(output, io.IOBase):
- yield output
- return
- if isinstance(output, str):
- filename = output
- else:
- filename = config['output.file']
- if (not filename) or (filename == '-'):
- yield sys.stdout
- return
- if suffix:
- filename += suffix
- f = open(filename, 'w')
- yield f
- f.close()
- # Single-table writers.
- def write_nothing(config: Config, df: DF, output: IO, **_kwargs) -> None:
- pass
- def write_text(config: Config, df: DF, output: IO, **_kwargs) -> None:
- """Write a memory usage data frame as a human-readable table."""
- memdf.util.pretty.debug(df)
- if df.shape[0]:
- df = df.copy()
- last_column_is_left_justified = False
- formatters = []
- for column in df.columns:
- if pd.api.types.is_string_dtype(df.dtypes[column]):
- df[column] = df[column].astype(str)
- # Left justify strings.
- width = max(len(column), df[column].str.len().max())
- formatters.append(lambda x: x.ljust(width))
- if column == df.columns[-1]:
- last_column_is_left_justified = True
- else:
- formatters.append(str)
- s = df.to_string(index=False, formatters=formatters, justify='left')
- if last_column_is_left_justified:
- # Strip trailing spaces.
- for line in s.split('\n'):
- print(line.rstrip())
- else:
- print(s, file=output)
- else:
- # No rows. `df.to_string()` doesn't look like a text table in this case.
- print(' '.join(df.columns))
- def write_json(_config: Config, df: DF, output: IO, **kwargs) -> None:
- """Write a memory usage data frame as json."""
- orient = kwargs.get('method', 'records')
- # .removeprefix('json_') in 3.9
- if orient.startswith('json_'):
- orient = orient[5:]
- df.to_json(output, orient=orient)
- def write_csv(_config: Config, df: DF, output: IO, **kwargs) -> None:
- """Write a memory usage data frame in csv or tsv form."""
- keywords = ('sep', 'na_rep', 'float_format', 'columns', 'header', 'index',
- 'index_label', 'quoting', 'quotechar', 'line_terminator',
- 'date_format', 'doublequote', 'escapechar', 'decimal')
- args = {k: kwargs[k] for k in keywords if k in kwargs}
- df.to_csv(output, **args)
- def write_markdown(_config: Config, df: DF, output: IO, **kwargs) -> None:
- """Write a memory usage data frame as markdown."""
- keywords = ('index', 'headers', 'showindex', 'tablefmt', 'numalign',
- 'stralign', 'disable_numparse', 'colalign', 'floatfmt')
- args = {k: kwargs[k] for k in keywords if k in kwargs}
- if 'tablefmt' not in args:
- args['tablefmt'] = kwargs.get('method', 'pipe')
- df.to_markdown(output, **args)
- print(file=output)
- # Multi-table writers.
- class DFsWriter(Protocol):
- """Type checking for multiple table writers."""
- def __call__(self, config: Config, dfs: DFs, output: OutputOption,
- writer: Callable, **kwargs) -> None:
- pass
- dfname_count = 0
- def dfname(df: DF, k: str = 'unknown') -> str:
- """Get a name for a data frame."""
- try:
- return df.name
- except AttributeError:
- if c := memdf.df.find_class(df):
- return c.name
- global dfname_count
- dfname_count += 1
- return k + str(dfname_count)
- def write_one(config: Config, frames: DFs, output: OutputOption,
- writer: Callable, **kw) -> None:
- """Write a group of of memory usage data frames to a single file."""
- with open_output(config, output) as out:
- sep = ''
- for df in frames.values():
- print(end=sep, file=out)
- if kw.get('title') and 'titlefmt' in kw and 'title' in df.attrs:
- print(kw['titlefmt'].format(df.attrs['title']), file=out)
- sep = '\n'
- writer(config, df, out, **kw)
- def write_many(config: Config, frames: DFs, output: OutputOption,
- writer: Callable, **kwargs) -> None:
- """Write a group of memory usage data frames to multiple files."""
- if (suffix := kwargs.get('suffix')) is None:
- if isinstance(output, str) and (suffix := pathlib.Path(output).suffix):
- pass
- elif 'method' in kwargs:
- suffix = '.' + kwargs['method']
- else:
- suffix = ''
- for df in frames.values():
- name = dfname(df)
- with open_output(config, output, f'-{name}{suffix}') as out:
- writer(config, df, out, **kwargs)
- def write_jsons(config: Config, frames: DFs, output: OutputOption,
- writer: Callable, **kwargs) -> None:
- """Write a group of memory usage data frames as a json dictionary."""
- with open_output(config, output) as out:
- print('{', file=out)
- if metadata := config['output.metadata']:
- for k, v in metadata.items():
- print(f' {json.dumps(k)}: {json.dumps(v)},', file=out)
- print(' "frames": ', file=out, end='')
- sep = '{'
- for df in frames.values():
- name = df.attrs.get('name', df.attrs.get('title', dfname(df)))
- print(sep, file=out)
- sep = ','
- print(f' {json.dumps(name)}: ', file=out, end='')
- writer(config, df, out, indent=6, **kwargs)
- print('}}', file=out)
- def write_none(_config: Config, _frames: DFs, _output: OutputOption,
- _writer: Callable, **_kwargs) -> None:
- pass
- def kwgetset(k: str, *args):
- r = set()
- for i in args:
- r |= set(i.get(k, set()))
- return r
- def prep(config: Config, df: pd.DataFrame, kw: Dict) -> pd.DataFrame:
- """Preprocess a table for output."""
- def each_column(k: str):
- for column in set(df.attrs.get(k, set()) | kw.get(k, set())):
- if column in df.columns:
- yield column
- def maybe_copy(copied, df):
- return (True, df if copied else df.copy())
- copied = False
- if config['report.demangle']:
- for column in each_column('demangle'):
- copied, df = maybe_copy(copied, df)
- df[column] = df[column].apply(demangle)
- for column in each_column('hexify'):
- copied, df = maybe_copy(copied, df)
- width = (int(df[column].max()).bit_length() + 3) // 4
- df[column] = df[column].apply(
- lambda x: '{0:0{width}X}'.format(x, width=width))
- if kw.get('hierify'):
- df = hierify(df)
- return df
- class Writer:
- def __init__(self,
- group: Callable,
- single: Callable,
- defaults: Optional[Dict] = None,
- overrides: Optional[Dict] = None):
- self.group = group
- self.single = single
- self.defaults = defaults or {}
- self.overrides = overrides or {}
- def write_df(self,
- config: Config,
- frame: pd.DataFrame,
- output: OutputOption = None,
- **kwargs) -> None:
- args = self._args(kwargs)
- with open_output(config, output) as out:
- self.single(config, prep(config, frame, args), out, **args)
- def write_dfs(self,
- config: Config,
- frames: DFs,
- output: OutputOption = None,
- **kwargs) -> None:
- """Write a group of memory usage data frames."""
- args = self._args(kwargs)
- frames = {k: prep(config, df, args) for k, df in frames.items()}
- self.group(config, frames, output, self.single, **args)
- def _args(self, kw: Mapping) -> Dict:
- r = self.defaults.copy()
- r.update(kw)
- r.update(self.overrides)
- return r
- class MarkdownWriter(Writer):
- def __init__(self,
- defaults: Optional[Dict] = None,
- overrides: Optional[Dict] = None):
- d = {'index': False}
- d.update(defaults or {})
- super().__init__(write_one, write_markdown, d, overrides)
- class JsonWriter(Writer):
- def __init__(self,
- defaults: Optional[Dict] = None,
- overrides: Optional[Dict] = None):
- super().__init__(write_jsons, write_json, defaults, overrides)
- self.overrides['hierify'] = False
- class CsvWriter(Writer):
- def __init__(self,
- defaults: Optional[Dict] = None,
- overrides: Optional[Dict] = None):
- d = {'index': False}
- d.update(defaults or {})
- super().__init__(write_many, write_csv, d, overrides)
- self.overrides['hierify'] = False
- WRITERS: Dict[str, Writer] = {
- 'none': Writer(write_none, write_nothing),
- 'text': Writer(write_one, write_text, {'titlefmt': '\n{}\n'}),
- 'json_split': JsonWriter(),
- 'json_records': JsonWriter(),
- 'json_index': JsonWriter(),
- 'json_columns': JsonWriter(),
- 'json_values': JsonWriter(),
- 'json_table': JsonWriter(),
- 'csv': CsvWriter({'sep': ','}),
- 'tsv': CsvWriter({'sep': '\t'}),
- 'plain': MarkdownWriter({'titlefmt': '\n{}\n'}),
- 'simple': MarkdownWriter({'titlefmt': '\n{}\n'}),
- 'grid': MarkdownWriter({'titlefmt': '\n\n'}),
- 'fancy_grid': MarkdownWriter({'titlefmt': '\n\n'}),
- 'html': MarkdownWriter({'titlefmt': '<h2></h2>'}),
- 'unsafehtml': MarkdownWriter({'titlefmt': '<h2></h2>'}),
- 'github': MarkdownWriter(),
- 'pipe': MarkdownWriter(),
- 'orgtbl': MarkdownWriter(),
- 'jira': MarkdownWriter(),
- 'presto': MarkdownWriter(),
- 'pretty': MarkdownWriter(),
- 'psql': MarkdownWriter(),
- 'rst': MarkdownWriter(),
- 'mediawiki': MarkdownWriter(),
- 'moinmoin': MarkdownWriter(),
- 'youtrack': MarkdownWriter(),
- 'latex': MarkdownWriter(),
- 'latex_raw': MarkdownWriter(),
- 'latex_booktabs': MarkdownWriter(),
- 'latex_longtable': MarkdownWriter(),
- 'textile': MarkdownWriter(),
- }
- OUTPUT_FORMAT_CONFIG: ConfigDescription = {
- Config.group_def('output'): {
- 'title': 'output options',
- },
- 'output.format': {
- 'help': f'Output format: one of {", ".join(WRITERS)}.',
- 'metavar': 'FORMAT',
- 'default': 'simple',
- 'choices': list(WRITERS.keys()),
- 'argparse': {
- 'alias': ['--to', '-t'],
- },
- },
- 'output.metadata': {
- 'help': 'Metadata for JSON',
- 'metavar': 'NAME:VALUE',
- 'default': [],
- 'argparse': {
- 'alias': ['--metadata']
- },
- 'postprocess': postprocess_output_metadata,
- }
- }
- OUTPUT_CONFIG: ConfigDescription = {
- **OUTPUT_FILE_CONFIG,
- **OUTPUT_FORMAT_CONFIG,
- }
- def write_dfs(config: Config,
- frames: DFs,
- output: OutputOption = None,
- method: Optional[str] = None,
- **kwargs) -> None:
- """Write a group of memory usage data frames."""
- kwargs['method'] = method or config['output.format']
- WRITERS[kwargs['method']].write_dfs(config, frames, output, **kwargs)
- def write_df(config: Config,
- frame: DF,
- output: OutputOption = None,
- method: Optional[str] = None,
- **kwargs) -> None:
- """Write a memory usage data frame."""
- kwargs['method'] = method or config['output.format']
- WRITERS[kwargs['method']].write_df(config, frame, output, **kwargs)
|