report.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. #
  2. # Copyright (c) 2021 Project CHIP Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. """Memory DataFrame output and related utilities."""
  17. import contextlib
  18. import io
  19. import json
  20. import pathlib
  21. import sys
  22. from typing import IO, Any, Callable, Dict, List, Mapping, Optional, Protocol, Sequence, Union
  23. import cxxfilt # type: ignore
  24. import memdf.df
  25. import memdf.select
  26. import memdf.util.pretty
  27. import pandas as pd # type: ignore
  28. from memdf import DF, Config, ConfigDescription, DFs
  29. from memdf.util.config import ParseSizeAction
  30. REPORT_DEMANGLE_CONFIG: ConfigDescription = {
  31. Config.group_map('report'): {
  32. 'group': 'output'
  33. },
  34. 'report.demangle': {
  35. 'help': 'Demangle C++ symbol names',
  36. 'default': False,
  37. 'argparse': {
  38. 'alias': ['--demangle', '-C'],
  39. 'action': 'store_true',
  40. },
  41. },
  42. }
  43. REPORT_LIMIT_CONFIG: ConfigDescription = {
  44. Config.group_map('report'): {
  45. 'group': 'output'
  46. },
  47. 'report.limit': {
  48. 'help': 'Limit display to items above the given size',
  49. 'metavar': 'BYTES',
  50. 'default': 0,
  51. 'argparse': {
  52. 'alias': ['--limit'],
  53. 'action': ParseSizeAction,
  54. },
  55. }
  56. }
  57. REPORT_CONFIG: ConfigDescription = {
  58. **REPORT_DEMANGLE_CONFIG,
  59. **REPORT_LIMIT_CONFIG,
  60. }
  61. def postprocess_report_by(config: Config, key: str, info: Mapping) -> None:
  62. """For --report-by=region, select all sections."""
  63. assert key == 'report.by'
  64. if config.get(key) == 'region':
  65. config.put('section.select-all', True),
  66. REPORT_BY_CONFIG: ConfigDescription = {
  67. 'report.by': {
  68. 'help': 'Reporting group',
  69. 'metavar': 'GROUP',
  70. 'choices': memdf.select.SELECTION_CHOICES,
  71. 'default': 'section',
  72. 'argparse': {
  73. 'alias': ['--by'],
  74. },
  75. 'postprocess': postprocess_report_by,
  76. },
  77. }
  78. def demangle(symbol: str):
  79. try:
  80. symbol = cxxfilt.demangle(symbol, external_only=False)
  81. except cxxfilt.InvalidName:
  82. pass
  83. return symbol
  84. def hierify_rows(table: Sequence[Sequence[Any]]) -> List[List[Any]]:
  85. if not table:
  86. return table
  87. persist = None
  88. rows = []
  89. for row in table:
  90. if persist is None:
  91. persist = [None] * len(row)
  92. new_persist = []
  93. new_row = []
  94. changed = False
  95. for old, new in zip(persist, list(row)):
  96. if not changed and isinstance(new, str) and new == old:
  97. new_row.append('')
  98. new_persist.append(old)
  99. else:
  100. changed = True
  101. new_row.append(new)
  102. new_persist.append(new)
  103. rows.append(new_row)
  104. persist = new_persist
  105. return rows
  106. def hierify(df: pd.DataFrame) -> pd.DataFrame:
  107. columns = list(df.columns)
  108. rows = hierify_rows(df.itertuples(index=False))
  109. r = pd.DataFrame(rows, columns=columns)
  110. r.attrs = df.attrs
  111. return r
  112. # Output
  113. OUTPUT_FILE_CONFIG: ConfigDescription = {
  114. Config.group_def('output'): {
  115. 'title': 'output options',
  116. },
  117. 'output.file': {
  118. 'help': 'Output file',
  119. 'metavar': 'FILENAME',
  120. 'default': None,
  121. 'argparse': {
  122. 'alias': ['--output', '-O'],
  123. },
  124. },
  125. }
  126. def postprocess_output_metadata(config: Config, key: str,
  127. info: Mapping) -> None:
  128. """For --output-metadata=KEY:VALUE list, convert to dictionary."""
  129. assert key == 'output.metadata'
  130. metadata = {}
  131. for s in config.get(key):
  132. if ':' in s:
  133. k, v = s.split(':', 1)
  134. else:
  135. k, v = s, True
  136. metadata[k] = v
  137. config.put(key, metadata)
  138. OutputOption = Union[IO, str, None]
  139. @contextlib.contextmanager
  140. def open_output(config: Config,
  141. output: OutputOption = None,
  142. suffix: Optional[str] = None):
  143. if isinstance(output, io.IOBase):
  144. yield output
  145. return
  146. if isinstance(output, str):
  147. filename = output
  148. else:
  149. filename = config['output.file']
  150. if (not filename) or (filename == '-'):
  151. yield sys.stdout
  152. return
  153. if suffix:
  154. filename += suffix
  155. f = open(filename, 'w')
  156. yield f
  157. f.close()
  158. # Single-table writers.
  159. def write_nothing(config: Config, df: DF, output: IO, **_kwargs) -> None:
  160. pass
  161. def write_text(config: Config, df: DF, output: IO, **_kwargs) -> None:
  162. """Write a memory usage data frame as a human-readable table."""
  163. memdf.util.pretty.debug(df)
  164. if df.shape[0]:
  165. df = df.copy()
  166. last_column_is_left_justified = False
  167. formatters = []
  168. for column in df.columns:
  169. if pd.api.types.is_string_dtype(df.dtypes[column]):
  170. df[column] = df[column].astype(str)
  171. # Left justify strings.
  172. width = max(len(column), df[column].str.len().max())
  173. formatters.append(lambda x: x.ljust(width))
  174. if column == df.columns[-1]:
  175. last_column_is_left_justified = True
  176. else:
  177. formatters.append(str)
  178. s = df.to_string(index=False, formatters=formatters, justify='left')
  179. if last_column_is_left_justified:
  180. # Strip trailing spaces.
  181. for line in s.split('\n'):
  182. print(line.rstrip())
  183. else:
  184. print(s, file=output)
  185. else:
  186. # No rows. `df.to_string()` doesn't look like a text table in this case.
  187. print(' '.join(df.columns))
  188. def write_json(_config: Config, df: DF, output: IO, **kwargs) -> None:
  189. """Write a memory usage data frame as json."""
  190. orient = kwargs.get('method', 'records')
  191. # .removeprefix('json_') in 3.9
  192. if orient.startswith('json_'):
  193. orient = orient[5:]
  194. df.to_json(output, orient=orient)
  195. def write_csv(_config: Config, df: DF, output: IO, **kwargs) -> None:
  196. """Write a memory usage data frame in csv or tsv form."""
  197. keywords = ('sep', 'na_rep', 'float_format', 'columns', 'header', 'index',
  198. 'index_label', 'quoting', 'quotechar', 'line_terminator',
  199. 'date_format', 'doublequote', 'escapechar', 'decimal')
  200. args = {k: kwargs[k] for k in keywords if k in kwargs}
  201. df.to_csv(output, **args)
  202. def write_markdown(_config: Config, df: DF, output: IO, **kwargs) -> None:
  203. """Write a memory usage data frame as markdown."""
  204. keywords = ('index', 'headers', 'showindex', 'tablefmt', 'numalign',
  205. 'stralign', 'disable_numparse', 'colalign', 'floatfmt')
  206. args = {k: kwargs[k] for k in keywords if k in kwargs}
  207. if 'tablefmt' not in args:
  208. args['tablefmt'] = kwargs.get('method', 'pipe')
  209. df.to_markdown(output, **args)
  210. print(file=output)
  211. # Multi-table writers.
  212. class DFsWriter(Protocol):
  213. """Type checking for multiple table writers."""
  214. def __call__(self, config: Config, dfs: DFs, output: OutputOption,
  215. writer: Callable, **kwargs) -> None:
  216. pass
  217. dfname_count = 0
  218. def dfname(df: DF, k: str = 'unknown') -> str:
  219. """Get a name for a data frame."""
  220. try:
  221. return df.name
  222. except AttributeError:
  223. if c := memdf.df.find_class(df):
  224. return c.name
  225. global dfname_count
  226. dfname_count += 1
  227. return k + str(dfname_count)
  228. def write_one(config: Config, frames: DFs, output: OutputOption,
  229. writer: Callable, **kw) -> None:
  230. """Write a group of of memory usage data frames to a single file."""
  231. with open_output(config, output) as out:
  232. sep = ''
  233. for df in frames.values():
  234. print(end=sep, file=out)
  235. if kw.get('title') and 'titlefmt' in kw and 'title' in df.attrs:
  236. print(kw['titlefmt'].format(df.attrs['title']), file=out)
  237. sep = '\n'
  238. writer(config, df, out, **kw)
  239. def write_many(config: Config, frames: DFs, output: OutputOption,
  240. writer: Callable, **kwargs) -> None:
  241. """Write a group of memory usage data frames to multiple files."""
  242. if (suffix := kwargs.get('suffix')) is None:
  243. if isinstance(output, str) and (suffix := pathlib.Path(output).suffix):
  244. pass
  245. elif 'method' in kwargs:
  246. suffix = '.' + kwargs['method']
  247. else:
  248. suffix = ''
  249. for df in frames.values():
  250. name = dfname(df)
  251. with open_output(config, output, f'-{name}{suffix}') as out:
  252. writer(config, df, out, **kwargs)
  253. def write_jsons(config: Config, frames: DFs, output: OutputOption,
  254. writer: Callable, **kwargs) -> None:
  255. """Write a group of memory usage data frames as a json dictionary."""
  256. with open_output(config, output) as out:
  257. print('{', file=out)
  258. if metadata := config['output.metadata']:
  259. for k, v in metadata.items():
  260. print(f' {json.dumps(k)}: {json.dumps(v)},', file=out)
  261. print(' "frames": ', file=out, end='')
  262. sep = '{'
  263. for df in frames.values():
  264. name = df.attrs.get('name', df.attrs.get('title', dfname(df)))
  265. print(sep, file=out)
  266. sep = ','
  267. print(f' {json.dumps(name)}: ', file=out, end='')
  268. writer(config, df, out, indent=6, **kwargs)
  269. print('}}', file=out)
  270. def write_none(_config: Config, _frames: DFs, _output: OutputOption,
  271. _writer: Callable, **_kwargs) -> None:
  272. pass
  273. def kwgetset(k: str, *args):
  274. r = set()
  275. for i in args:
  276. r |= set(i.get(k, set()))
  277. return r
  278. def prep(config: Config, df: pd.DataFrame, kw: Dict) -> pd.DataFrame:
  279. """Preprocess a table for output."""
  280. def each_column(k: str):
  281. for column in set(df.attrs.get(k, set()) | kw.get(k, set())):
  282. if column in df.columns:
  283. yield column
  284. def maybe_copy(copied, df):
  285. return (True, df if copied else df.copy())
  286. copied = False
  287. if config['report.demangle']:
  288. for column in each_column('demangle'):
  289. copied, df = maybe_copy(copied, df)
  290. df[column] = df[column].apply(demangle)
  291. for column in each_column('hexify'):
  292. copied, df = maybe_copy(copied, df)
  293. width = (int(df[column].max()).bit_length() + 3) // 4
  294. df[column] = df[column].apply(
  295. lambda x: '{0:0{width}X}'.format(x, width=width))
  296. if kw.get('hierify'):
  297. df = hierify(df)
  298. return df
  299. class Writer:
  300. def __init__(self,
  301. group: Callable,
  302. single: Callable,
  303. defaults: Optional[Dict] = None,
  304. overrides: Optional[Dict] = None):
  305. self.group = group
  306. self.single = single
  307. self.defaults = defaults or {}
  308. self.overrides = overrides or {}
  309. def write_df(self,
  310. config: Config,
  311. frame: pd.DataFrame,
  312. output: OutputOption = None,
  313. **kwargs) -> None:
  314. args = self._args(kwargs)
  315. with open_output(config, output) as out:
  316. self.single(config, prep(config, frame, args), out, **args)
  317. def write_dfs(self,
  318. config: Config,
  319. frames: DFs,
  320. output: OutputOption = None,
  321. **kwargs) -> None:
  322. """Write a group of memory usage data frames."""
  323. args = self._args(kwargs)
  324. frames = {k: prep(config, df, args) for k, df in frames.items()}
  325. self.group(config, frames, output, self.single, **args)
  326. def _args(self, kw: Mapping) -> Dict:
  327. r = self.defaults.copy()
  328. r.update(kw)
  329. r.update(self.overrides)
  330. return r
  331. class MarkdownWriter(Writer):
  332. def __init__(self,
  333. defaults: Optional[Dict] = None,
  334. overrides: Optional[Dict] = None):
  335. d = {'index': False}
  336. d.update(defaults or {})
  337. super().__init__(write_one, write_markdown, d, overrides)
  338. class JsonWriter(Writer):
  339. def __init__(self,
  340. defaults: Optional[Dict] = None,
  341. overrides: Optional[Dict] = None):
  342. super().__init__(write_jsons, write_json, defaults, overrides)
  343. self.overrides['hierify'] = False
  344. class CsvWriter(Writer):
  345. def __init__(self,
  346. defaults: Optional[Dict] = None,
  347. overrides: Optional[Dict] = None):
  348. d = {'index': False}
  349. d.update(defaults or {})
  350. super().__init__(write_many, write_csv, d, overrides)
  351. self.overrides['hierify'] = False
  352. WRITERS: Dict[str, Writer] = {
  353. 'none': Writer(write_none, write_nothing),
  354. 'text': Writer(write_one, write_text, {'titlefmt': '\n{}\n'}),
  355. 'json_split': JsonWriter(),
  356. 'json_records': JsonWriter(),
  357. 'json_index': JsonWriter(),
  358. 'json_columns': JsonWriter(),
  359. 'json_values': JsonWriter(),
  360. 'json_table': JsonWriter(),
  361. 'csv': CsvWriter({'sep': ','}),
  362. 'tsv': CsvWriter({'sep': '\t'}),
  363. 'plain': MarkdownWriter({'titlefmt': '\n{}\n'}),
  364. 'simple': MarkdownWriter({'titlefmt': '\n{}\n'}),
  365. 'grid': MarkdownWriter({'titlefmt': '\n\n'}),
  366. 'fancy_grid': MarkdownWriter({'titlefmt': '\n\n'}),
  367. 'html': MarkdownWriter({'titlefmt': '<h2></h2>'}),
  368. 'unsafehtml': MarkdownWriter({'titlefmt': '<h2></h2>'}),
  369. 'github': MarkdownWriter(),
  370. 'pipe': MarkdownWriter(),
  371. 'orgtbl': MarkdownWriter(),
  372. 'jira': MarkdownWriter(),
  373. 'presto': MarkdownWriter(),
  374. 'pretty': MarkdownWriter(),
  375. 'psql': MarkdownWriter(),
  376. 'rst': MarkdownWriter(),
  377. 'mediawiki': MarkdownWriter(),
  378. 'moinmoin': MarkdownWriter(),
  379. 'youtrack': MarkdownWriter(),
  380. 'latex': MarkdownWriter(),
  381. 'latex_raw': MarkdownWriter(),
  382. 'latex_booktabs': MarkdownWriter(),
  383. 'latex_longtable': MarkdownWriter(),
  384. 'textile': MarkdownWriter(),
  385. }
  386. OUTPUT_FORMAT_CONFIG: ConfigDescription = {
  387. Config.group_def('output'): {
  388. 'title': 'output options',
  389. },
  390. 'output.format': {
  391. 'help': f'Output format: one of {", ".join(WRITERS)}.',
  392. 'metavar': 'FORMAT',
  393. 'default': 'simple',
  394. 'choices': list(WRITERS.keys()),
  395. 'argparse': {
  396. 'alias': ['--to', '-t'],
  397. },
  398. },
  399. 'output.metadata': {
  400. 'help': 'Metadata for JSON',
  401. 'metavar': 'NAME:VALUE',
  402. 'default': [],
  403. 'argparse': {
  404. 'alias': ['--metadata']
  405. },
  406. 'postprocess': postprocess_output_metadata,
  407. }
  408. }
  409. OUTPUT_CONFIG: ConfigDescription = {
  410. **OUTPUT_FILE_CONFIG,
  411. **OUTPUT_FORMAT_CONFIG,
  412. }
  413. def write_dfs(config: Config,
  414. frames: DFs,
  415. output: OutputOption = None,
  416. method: Optional[str] = None,
  417. **kwargs) -> None:
  418. """Write a group of memory usage data frames."""
  419. kwargs['method'] = method or config['output.format']
  420. WRITERS[kwargs['method']].write_dfs(config, frames, output, **kwargs)
  421. def write_df(config: Config,
  422. frame: DF,
  423. output: OutputOption = None,
  424. method: Optional[str] = None,
  425. **kwargs) -> None:
  426. """Write a memory usage data frame."""
  427. kwargs['method'] = method or config['output.format']
  428. WRITERS[kwargs['method']].write_df(config, frame, output, **kwargs)