select.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. #
  2. # Copyright (c) 2021 Project CHIP Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. """Data frame selection utilities."""
  17. from typing import Mapping, Optional
  18. import memdf.name
  19. import memdf.util.config
  20. import memdf.util.pretty
  21. import numpy as np # type: ignore
  22. from memdf import DF, Config, ConfigDescription
  23. def split_size(config: Config, key: str) -> None:
  24. """Split a name:size configuration value.
  25. When a program supports a size threshold for selection or summary,
  26. this can be specificed for a particular item with a suffix on the
  27. configuration, e.g. `--section=.text:16K`.
  28. Given a configuration key `col.select` referring to such a list of
  29. arguments, this function strips any sizes from those arguments
  30. and stores them as a name:size dictionary in `col.limit`.
  31. """
  32. src = key.split('.')
  33. dst = src[:-1] + ['limit']
  34. splits = [s.split(':') for s in config.getl(src, [])]
  35. config.putl(src, [x[0] for x in splits])
  36. config.putl(dst, {
  37. x[0]: memdf.util.config.parse_size(x[1])
  38. for x in splits if len(x) > 1
  39. })
  40. def get_limit(config: Config, column: str, name: str) -> int:
  41. return config.getl([column, 'limit', name], config.get('report.limit', 0))
  42. def postprocess_selections(config: Config, key: str, info: Mapping) -> None:
  43. """Resolve select/ignore command options."""
  44. split_size(config, key)
  45. choice, select = key.split('.')
  46. assert select == 'select'
  47. selections = config.get(key)
  48. if not config.getl([choice, 'ignore-all'], False):
  49. if defaults := config.getl([choice, 'default']):
  50. for i in config.getl([choice, 'ignore']):
  51. if i in defaults:
  52. defaults.remove(i)
  53. selections += defaults
  54. config.put(key, frozenset(selections))
  55. def select_and_ignore_config_desc(key: str) -> ConfigDescription:
  56. return {
  57. Config.group_map(key): {
  58. 'group': 'select'
  59. },
  60. f'{key}.select': {
  61. 'help':
  62. f'{key.capitalize()}(s) to process; otherwise all not ignored',
  63. 'metavar': 'NAME',
  64. 'default': [],
  65. 'argparse': {
  66. 'alias': [f'--{key}'],
  67. },
  68. 'postprocess': postprocess_selections
  69. },
  70. f'{key}.select-all': {
  71. 'help': f'Select all {key}s',
  72. 'default': False,
  73. },
  74. key + '.ignore': {
  75. 'help': f'{key.capitalize()}(s) to ignore',
  76. 'metavar': 'NAME',
  77. 'default': [],
  78. },
  79. f'{key}.ignore-all': {
  80. 'help': f'Ignore all {key}s unless explicitly selected',
  81. 'default': False,
  82. },
  83. }
  84. SECTION_CONFIG = select_and_ignore_config_desc('section')
  85. SYMBOL_CONFIG = select_and_ignore_config_desc('symbol')
  86. REGION_CONFIG = select_and_ignore_config_desc('region')
  87. CONFIG: ConfigDescription = {
  88. Config.group_def('select'): {
  89. 'title': 'selection options',
  90. },
  91. **SECTION_CONFIG,
  92. **SYMBOL_CONFIG,
  93. **REGION_CONFIG,
  94. }
  95. COLLECTED_CHOICES = ['symbol', 'section']
  96. SYNTHETIC_CHOICES = ['region']
  97. SELECTION_CHOICES = COLLECTED_CHOICES + SYNTHETIC_CHOICES
  98. def is_selected(config: Config, column, name) -> bool:
  99. """Test `name` against the configured selection criteria for `column`."""
  100. if config.getl([column, 'select-all']):
  101. return True
  102. if name in config.getl([column, 'select'], []):
  103. return True
  104. return False
  105. def synthesize_region(config: Config, df: DF, column: str) -> DF:
  106. """Add a 'region' column derived from the 'section' column."""
  107. cmap = config.transpose_dictlist(config.get('region.sections', {}))
  108. memdf.util.pretty.debug(cmap)
  109. df[column] = df['section'].map(lambda x: cmap.get(x, memdf.name.UNKNOWN))
  110. return df
  111. def groupby_region(df: DF):
  112. return df[(df['size'] > 0) | (df['region'] != memdf.name.UNKNOWN)]
  113. SYNTHESIZE = {
  114. 'region': (synthesize_region, groupby_region),
  115. }
  116. def synthesize_column(config: Config, df: DF, column: str) -> DF:
  117. if column not in df.columns:
  118. SYNTHESIZE[column][0](config, df, column)
  119. return df
  120. def select_configured_column(config: Config, df: DF, column: str) -> DF:
  121. """Apply configured selection options to a column"""
  122. if column in df and not config.getl([column, 'select-all']):
  123. selections = config.getl([column, 'select'], [])
  124. if selections:
  125. df = df.loc[df[column].isin(selections)]
  126. return df
  127. def select_configured(config: Config, df: DF, columns=SELECTION_CHOICES) -> DF:
  128. for column in columns:
  129. df = select_configured_column(config, df, column)
  130. return df
  131. def groupby(config: Config, df: DF, by: Optional[str] = None):
  132. if not by:
  133. by = config['report.by']
  134. df = df[[by, 'size']].groupby(by).aggregate(np.sum).reset_index()
  135. if by in SYNTHESIZE:
  136. df = SYNTHESIZE[by][1](df)
  137. return df