gh_db_query.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (c) 2022 Project CHIP Authors
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """Common queries on a size database."""
  18. import datetime
  19. import logging
  20. import sys
  21. from typing import cast, Dict, List, Mapping, Optional, Tuple
  22. import pandas as pd # type: ignore
  23. import memdf.report
  24. import memdf.util.config
  25. import memdf.util.sqlite
  26. from memdf.sizedb import SizeDatabase
  27. from memdf import Config
  28. QUERY_CONFIG = {
  29. Config.group_map('query'): {
  30. 'group': 'output'
  31. },
  32. 'report.increases': {
  33. 'help': 'Highlight large increases',
  34. 'metavar': 'PERCENT',
  35. 'default': 0.0,
  36. 'argparse': {
  37. 'alias': ['--threshold'],
  38. 'type': float,
  39. },
  40. },
  41. 'query.where': {
  42. 'help': 'SQL filter',
  43. 'metavar': 'SQL-EXPR',
  44. 'default': '',
  45. 'argparse': {
  46. 'alias': ['--where'],
  47. },
  48. },
  49. 'query.order': {
  50. 'help': 'sort order',
  51. 'metavar': 'COLUMN[,COLUMN]*',
  52. 'default': '',
  53. 'argparse': {
  54. 'alias': ['--order'],
  55. },
  56. },
  57. 'query.limit': {
  58. 'help': 'limit result size',
  59. 'metavar': 'ROWS',
  60. 'default': 0,
  61. 'argparse': {
  62. 'alias': ['--limit'],
  63. },
  64. },
  65. }
  66. def argsplit(metavar: str, value: str) -> Tuple[Optional[Tuple], Dict]:
  67. """Given comma-separated metavar and values, match them up."""
  68. values = tuple(value.split(','))
  69. names = metavar.split(',')
  70. if len(names) < len(values):
  71. logging.error('Too many values for %s', metavar)
  72. return (None, {})
  73. if len(names) > len(values):
  74. logging.error('Missing %s for %s', ','.join(names[len(values):]),
  75. metavar)
  76. return (None, {})
  77. return (values, dict(zip(names, values)))
  78. def postprocess_canned_sql_option(config: Config, key: str,
  79. info: Mapping) -> None:
  80. """Record information from simple SQL query options in one place."""
  81. value = config[key]
  82. if not value:
  83. return
  84. title = info['sql']['title']
  85. if isinstance(value, str):
  86. metavar = info.get('metavar', 'VALUE')
  87. if ',' in metavar:
  88. values, args = argsplit(metavar, value)
  89. if not values:
  90. return
  91. else:
  92. values = (value,)
  93. args = {metavar: value}
  94. title = title.format(**args)
  95. else:
  96. values = tuple()
  97. if config['queries'] is None:
  98. config['queries'] = []
  99. cast(list, config['queries']).append((title, key, values, info))
  100. def make_query(config: Config, info: Mapping) -> str:
  101. """Construct an SQL query string for a simple SQL query option."""
  102. args = {'where': '', 'order': '', 'limit': ''}
  103. if where := config.get('query.where'):
  104. if kw := info['sql'].get('where'):
  105. args['where'] = f'{kw} {where}'
  106. if order := (config.get('query.order') or info['sql'].get('order')):
  107. args['order'] = f'ORDER BY {order}'
  108. if limit := config.get('query.limit'):
  109. args['limit'] = f'LIMIT {limit}'
  110. return info['sql']['query'].format(**args)
  111. def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
  112. """Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
  113. df['time'] = df['time'].map(lambda t: datetime.datetime.utcfromtimestamp(t)
  114. .isoformat())
  115. return df
  116. def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
  117. """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""
  118. df['change'] = df.apply(lambda row: row.commit_size - row.parent_size,
  119. axis=1)
  120. df['% change'] = df.apply(lambda row: SizeDatabase.percent_change(
  121. row.parent_size, row.commit_size),
  122. axis=1)
  123. if threshold := config['report.increases']:
  124. df = df[df['% change'] > threshold]
  125. return df
  126. QUERY_CONFIG |= {
  127. 'query.platforms': {
  128. 'help': 'List known platforms',
  129. 'default': False,
  130. 'postprocess': postprocess_canned_sql_option,
  131. 'sql': {
  132. 'title': 'Platforms',
  133. 'query': '''
  134. SELECT DISTINCT platform FROM thing {where} {order} {limit}
  135. ''',
  136. 'where': 'WHERE',
  137. 'order': 'platform',
  138. },
  139. 'argparse': {
  140. 'alias': ['--platforms'],
  141. },
  142. },
  143. 'query.platform-targets': {
  144. 'help': 'List known targets for the given platform',
  145. 'metavar': 'PLATFORM',
  146. 'default': '',
  147. 'postprocess': postprocess_canned_sql_option,
  148. 'sql': {
  149. 'title': 'Platform Targets',
  150. 'query': '''
  151. SELECT DISTINCT platform, config, target
  152. FROM thing
  153. WHERE platform=? {where}
  154. {order} {limit}
  155. ''',
  156. 'where': 'AND',
  157. 'order': 'platform, config, target',
  158. },
  159. 'argparse': {
  160. 'alias': ['--platform-targets'],
  161. },
  162. },
  163. 'query.platform-sections': {
  164. 'help': 'List known sections for the given platform',
  165. 'metavar': 'PLATFORM',
  166. 'default': '',
  167. 'postprocess': postprocess_canned_sql_option,
  168. 'sql': {
  169. 'title': 'Platform Sections',
  170. 'query': '''
  171. SELECT DISTINCT platform, s.name AS section
  172. FROM thing t
  173. INNER JOIN build b ON t.id == b.thing_id
  174. INNER JOIN size s ON b.id == s.build_id
  175. WHERE platform=? {where}
  176. {order} {limit}
  177. ''',
  178. 'where': 'AND',
  179. 'order': 'platform, section',
  180. },
  181. 'argparse': {
  182. 'alias': ['--platform-sections'],
  183. },
  184. },
  185. 'query.section-sizes': {
  186. 'help': 'List size data for a given build section',
  187. 'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
  188. 'default': '',
  189. 'postprocess': postprocess_canned_sql_option,
  190. 'sql': {
  191. 'title': 'Sizes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
  192. 'query': '''
  193. SELECT DISTINCT time, hash, pr, size
  194. FROM build b
  195. INNER JOIN size s ON b.id == s.build_id
  196. WHERE b.thing_id == (SELECT id FROM thing
  197. WHERE platform == ?
  198. AND config == ?
  199. AND target == ?)
  200. AND name == ?
  201. {where}
  202. {order} {limit}
  203. ''',
  204. 'where': 'AND',
  205. 'order': 'time',
  206. 'postprocess': [postprocess_df_time],
  207. },
  208. },
  209. 'query.section-changes': {
  210. 'help': 'List size changes for a given build section',
  211. 'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
  212. 'default': '',
  213. 'postprocess': postprocess_canned_sql_option,
  214. 'sql': {
  215. 'title': 'Changes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
  216. 'query': '''
  217. WITH builds (bid, pid, time, pr, hash) AS (
  218. SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash
  219. FROM build b
  220. INNER JOIN build p
  221. ON p.hash = b.parent AND p.thing_id == b.thing_id
  222. WHERE b.thing_id == (SELECT id FROM thing
  223. WHERE platform == ?
  224. AND config == ?
  225. AND target == ?)
  226. )
  227. SELECT DISTINCT
  228. time, hash, pr,
  229. ps.size as parent_size,
  230. bs.size as commit_size
  231. FROM builds
  232. INNER JOIN size bs ON builds.bid == bs.build_id
  233. INNER JOIN size ps ON builds.pid == ps.build_id
  234. WHERE bs.name == ? AND ps.name == bs.name
  235. {where}
  236. {order} {limit}
  237. ''',
  238. 'where': 'AND',
  239. 'order': 'time',
  240. 'postprocess': [postprocess_df_time, postprocess_df_changes],
  241. },
  242. },
  243. 'query.all-changes': {
  244. 'help': 'List all size changes',
  245. 'default': False,
  246. 'postprocess': postprocess_canned_sql_option,
  247. 'sql': {
  248. 'title': 'Size Changes',
  249. 'query': '''
  250. WITH
  251. builds (bid, pid, time, pr, hash, thing_id) AS (
  252. SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash, b.thing_id
  253. FROM build b
  254. INNER JOIN build p
  255. ON p.hash = b.parent AND p.thing_id == b.thing_id
  256. ),
  257. changes (bid, tid, name, parent_size, commit_size, change) AS (
  258. SELECT DISTINCT
  259. bs.build_id,
  260. thing_id,
  261. bs.name,
  262. ps.size as parent_size,
  263. bs.size as commit_size,
  264. bs.size - ps.size as change
  265. FROM builds
  266. INNER JOIN size bs ON builds.bid == bs.build_id
  267. INNER JOIN size ps ON builds.pid == ps.build_id
  268. WHERE bs.name == ps.name
  269. )
  270. SELECT
  271. time, hash,
  272. platform, config, target, name,
  273. parent_size, commit_size, change
  274. FROM changes
  275. INNER JOIN build ON bid == build.id
  276. INNER JOIN thing ON tid == thing.id
  277. {where} {order} {limit}
  278. ''',
  279. 'where': 'AND',
  280. 'order': 'time',
  281. 'postprocess': [postprocess_df_time, postprocess_df_changes],
  282. },
  283. },
  284. 'query.build-sizes': {
  285. # SQLite doesn't have PIVOT so we have to script this.
  286. 'help': 'List size changes for a given build',
  287. 'metavar': 'PLATFORM,CONFIG,TARGET',
  288. 'default': '',
  289. },
  290. }
  291. def get_build_sections(db: SizeDatabase, build: str) -> Optional[Tuple]:
  292. """Split a build arg and get its thing_id and sections."""
  293. values, args = argsplit('PLATFORM,CONFIG,TARGET', build)
  294. if not values:
  295. return None
  296. platform = args['PLATFORM']
  297. pconfig = args['CONFIG']
  298. ptarget = args['TARGET']
  299. thing_id = db.select_thing_id(platform, pconfig, ptarget)
  300. if not thing_id:
  301. logging.error('No match for %s,%s,%s', platform, pconfig, ptarget)
  302. return None
  303. sections = db.select_sections_for_thing(thing_id)
  304. if not sections:
  305. logging.warning('No sections for %s,%s,%s', platform, pconfig, ptarget)
  306. return None
  307. return (platform, pconfig, ptarget, thing_id, sections)
  308. def make_build_sizes_query(config: Config, thing_id: str,
  309. sections: List[str]) -> Tuple[List[str], str]:
  310. """Construct and SQL query for all section sizes for a given thing."""
  311. # SQLite doesn't have PIVOT so we need to construct a query with
  312. # a column for each section.
  313. columns = ['time', 'hash', 'pr']
  314. cols = ', '.join(columns)
  315. joins = ''
  316. where = f' WHERE b.thing_id == {thing_id}'
  317. for i, s in enumerate(sections):
  318. columns.append(s)
  319. cols += f', s{i}.size AS s{i}z'
  320. joins += f' INNER JOIN size s{i} ON b.id == s{i}.build_id'
  321. where += f' AND s{i}.name == "{s}"'
  322. if qw := config['query.where']:
  323. where += f' AND {qw}'
  324. query = f'''SELECT {cols}
  325. FROM build b
  326. {joins}
  327. {where}
  328. ORDER BY {config.get('query.order') or 'time'}'''
  329. if limit := config['query.limit']:
  330. query += f' LIMIT {limit}'
  331. return (columns, query)
  332. def query_build_sizes(config: Config, db: SizeDatabase,
  333. build: str) -> Optional[pd.DataFrame]:
  334. """Get all sizes for the given build."""
  335. t = get_build_sections(db, build)
  336. if not t:
  337. return None
  338. platform, pconfig, ptarget, thing_id, sections = t
  339. columns, query = make_build_sizes_query(config, thing_id, sections)
  340. logging.debug('Query: %s', query)
  341. cur = db.execute(query)
  342. rows = cur.fetchall()
  343. if rows:
  344. df = pd.DataFrame(rows, columns=columns)
  345. df.attrs = {
  346. 'name': f'qbs-{build}',
  347. 'title': f'Sizes for {platform} {pconfig} {ptarget}',
  348. }
  349. return postprocess_df_time(config, df)
  350. return None
  351. def main(argv):
  352. status = 0
  353. try:
  354. cfg = {
  355. **memdf.util.config.CONFIG,
  356. **memdf.util.sqlite.CONFIG,
  357. **memdf.report.OUTPUT_CONFIG,
  358. **QUERY_CONFIG,
  359. }
  360. cfg['database.file']['argparse']['required'] = True
  361. config = Config().init(cfg)
  362. config.parse(argv)
  363. db = SizeDatabase(config['database.file'], writable=False)
  364. db.open()
  365. dfs = {}
  366. q = 0
  367. for title, key, values, info in config.get('queries', []):
  368. q += 1
  369. query = make_query(config, info)
  370. logging.debug('Option: %s', key)
  371. logging.debug('Title: %s', title)
  372. logging.debug('Query: %s', query.strip())
  373. logging.debug('With: %s', values)
  374. cur = db.execute(query, values)
  375. columns = [i[0] for i in cur.description]
  376. rows = cur.fetchall()
  377. if rows:
  378. df = pd.DataFrame(rows, columns=columns)
  379. df.attrs = {'name': f'query{q}', 'title': title}
  380. for f in info['sql'].get('postprocess', []):
  381. df = f(config, df)
  382. dfs[df.attrs['name']] = df
  383. if build := config['query.build-sizes']:
  384. q += 1
  385. if (df := query_build_sizes(config, db, build)) is not None:
  386. dfs[df.attrs['name']] = df
  387. if q == 0:
  388. config.argparse.print_help()
  389. return 1
  390. memdf.report.write_dfs(config,
  391. dfs,
  392. hierify=config['hierify'],
  393. title=True,
  394. floatfmt='5.1f')
  395. except Exception as exception:
  396. raise exception
  397. return status
  398. if __name__ == '__main__':
  399. sys.exit(main(sys.argv))