gh_db_query.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (c) 2022 Project CHIP Authors
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """Common queries on a size database."""
  18. import datetime
  19. import logging
  20. import sys
  21. from typing import Dict, List, Mapping, Optional, Tuple, cast
  22. import memdf.report
  23. import memdf.util.config
  24. import memdf.util.sqlite
  25. import pandas as pd # type: ignore
  26. from memdf import Config
  27. from memdf.sizedb import SizeDatabase
  28. QUERY_CONFIG = {
  29. Config.group_map('query'): {
  30. 'group': 'output'
  31. },
  32. 'report.increases': {
  33. 'help': 'Highlight large increases',
  34. 'metavar': 'PERCENT',
  35. 'default': 0.0,
  36. 'argparse': {
  37. 'alias': ['--threshold'],
  38. 'type': float,
  39. },
  40. },
  41. 'query.where': {
  42. 'help': 'SQL filter',
  43. 'metavar': 'SQL-EXPR',
  44. 'default': '',
  45. 'argparse': {
  46. 'alias': ['--where'],
  47. },
  48. },
  49. 'query.order': {
  50. 'help': 'sort order',
  51. 'metavar': 'COLUMN[,COLUMN]*',
  52. 'default': '',
  53. 'argparse': {
  54. 'alias': ['--order'],
  55. },
  56. },
  57. 'query.limit': {
  58. 'help': 'limit result size',
  59. 'metavar': 'ROWS',
  60. 'default': 0,
  61. 'argparse': {
  62. 'alias': ['--limit'],
  63. },
  64. },
  65. }
  66. def argsplit(metavar: str, value: str) -> Tuple[Optional[Tuple], Dict]:
  67. """Given comma-separated metavar and values, match them up."""
  68. values = tuple(value.split(','))
  69. names = metavar.split(',')
  70. if len(names) < len(values):
  71. logging.error('Too many values for %s', metavar)
  72. return (None, {})
  73. if len(names) > len(values):
  74. logging.error('Missing %s for %s', ','.join(names[len(values):]),
  75. metavar)
  76. return (None, {})
  77. return (values, dict(zip(names, values)))
  78. def postprocess_canned_sql_option(config: Config, key: str,
  79. info: Mapping) -> None:
  80. """Record information from simple SQL query options in one place."""
  81. value = config[key]
  82. if not value:
  83. return
  84. title = info['sql']['title']
  85. if isinstance(value, str):
  86. metavar = info.get('metavar', 'VALUE')
  87. if ',' in metavar:
  88. values, args = argsplit(metavar, value)
  89. if not values:
  90. return
  91. else:
  92. values = (value,)
  93. args = {metavar: value}
  94. title = title.format(**args)
  95. else:
  96. values = tuple()
  97. if config['queries'] is None:
  98. config['queries'] = []
  99. cast(list, config['queries']).append((title, key, values, info))
  100. def make_query(config: Config, info: Mapping) -> str:
  101. """Construct an SQL query string for a simple SQL query option."""
  102. args = {'where': '', 'order': '', 'limit': ''}
  103. if where := config.get('query.where'):
  104. if kw := info['sql'].get('where'):
  105. args['where'] = f'{kw} {where}'
  106. if order := (config.get('query.order') or info['sql'].get('order')):
  107. args['order'] = f'ORDER BY {order}'
  108. if limit := config.get('query.limit'):
  109. args['limit'] = f'LIMIT {limit}'
  110. return info['sql']['query'].format(**args)
  111. def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
  112. """Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
  113. df['time'] = df['time'].map(lambda t: datetime.datetime.utcfromtimestamp(t)
  114. .isoformat())
  115. return df
  116. def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
  117. """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""
  118. df['change'] = df.apply(lambda row: row.commit_size - row.parent_size,
  119. axis=1)
  120. df['% change'] = df.apply(lambda row: SizeDatabase.percent_change(
  121. row.parent_size, row.commit_size),
  122. axis=1)
  123. if threshold := config['report.increases']:
  124. df = df[df['% change'] > threshold]
  125. return df
  126. QUERY_CONFIG |= {
  127. 'query.platforms': {
  128. 'help': 'List known platforms',
  129. 'default': False,
  130. 'postprocess': postprocess_canned_sql_option,
  131. 'sql': {
  132. 'title': 'Platforms',
  133. 'query': '''
  134. SELECT DISTINCT platform FROM thing {where} {order} {limit}
  135. ''',
  136. 'where': 'WHERE',
  137. 'order': 'platform',
  138. },
  139. 'argparse': {
  140. 'alias': ['--platforms'],
  141. },
  142. },
  143. 'query.platform-targets': {
  144. 'help': 'List known targets for the given platform',
  145. 'metavar': 'PLATFORM',
  146. 'default': '',
  147. 'postprocess': postprocess_canned_sql_option,
  148. 'sql': {
  149. 'title': 'Platform Targets',
  150. 'query': '''
  151. SELECT DISTINCT platform, config, target
  152. FROM thing
  153. WHERE platform=? {where}
  154. {order} {limit}
  155. ''',
  156. 'where': 'AND',
  157. 'order': 'platform, config, target',
  158. },
  159. 'argparse': {
  160. 'alias': ['--platform-targets'],
  161. },
  162. },
  163. 'query.platform-sections': {
  164. 'help': 'List known sections for the given platform',
  165. 'metavar': 'PLATFORM',
  166. 'default': '',
  167. 'postprocess': postprocess_canned_sql_option,
  168. 'sql': {
  169. 'title': 'Platform Sections',
  170. 'query': '''
  171. SELECT DISTINCT platform, s.name AS section
  172. FROM thing t
  173. INNER JOIN build b ON t.id == b.thing_id
  174. INNER JOIN size s ON b.id == s.build_id
  175. WHERE platform=? {where}
  176. {order} {limit}
  177. ''',
  178. 'where': 'AND',
  179. 'order': 'platform, section',
  180. },
  181. 'argparse': {
  182. 'alias': ['--platform-sections'],
  183. },
  184. },
  185. 'query.section-sizes': {
  186. 'help': 'List size data for a given build section',
  187. 'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
  188. 'default': '',
  189. 'postprocess': postprocess_canned_sql_option,
  190. 'sql': {
  191. 'title': 'Sizes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
  192. 'query': '''
  193. SELECT DISTINCT time, hash, pr, size
  194. FROM build b
  195. INNER JOIN size s ON b.id == s.build_id
  196. WHERE b.thing_id == (SELECT id FROM thing
  197. WHERE platform == ?
  198. AND config == ?
  199. AND target == ?)
  200. AND name == ?
  201. {where}
  202. {order} {limit}
  203. ''',
  204. 'where': 'AND',
  205. 'order': 'time',
  206. 'postprocess': [postprocess_df_time],
  207. },
  208. },
  209. 'query.section-changes': {
  210. 'help': 'List size changes for a given build section',
  211. 'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
  212. 'default': '',
  213. 'postprocess': postprocess_canned_sql_option,
  214. 'sql': {
  215. 'title': 'Changes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
  216. 'query': '''
  217. WITH builds (bid, pid, time, pr, hash) AS (
  218. SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash
  219. FROM build b
  220. INNER JOIN build p
  221. ON p.hash = b.parent AND p.thing_id == b.thing_id
  222. WHERE b.thing_id == (SELECT id FROM thing
  223. WHERE platform == ?
  224. AND config == ?
  225. AND target == ?)
  226. )
  227. SELECT DISTINCT
  228. time, hash, pr,
  229. ps.size as parent_size,
  230. bs.size as commit_size
  231. FROM builds
  232. INNER JOIN size bs ON builds.bid == bs.build_id
  233. INNER JOIN size ps ON builds.pid == ps.build_id
  234. WHERE bs.name == ? AND ps.name == bs.name
  235. {where}
  236. {order} {limit}
  237. ''',
  238. 'where': 'AND',
  239. 'order': 'time',
  240. 'postprocess': [postprocess_df_time, postprocess_df_changes],
  241. },
  242. },
  243. 'query.all-changes': {
  244. 'help': 'List all size changes',
  245. 'default': False,
  246. 'postprocess': postprocess_canned_sql_option,
  247. 'sql': {
  248. 'title': 'Size Changes',
  249. 'query': '''
  250. WITH
  251. builds (bid, pid, time, pr, hash, thing_id) AS (
  252. SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash, b.thing_id
  253. FROM build b
  254. INNER JOIN build p
  255. ON p.hash = b.parent AND p.thing_id == b.thing_id
  256. ),
  257. changes (bid, tid, name, parent_size, commit_size, change) AS (
  258. SELECT DISTINCT
  259. bs.build_id,
  260. thing_id,
  261. bs.name,
  262. ps.size as parent_size,
  263. bs.size as commit_size,
  264. bs.size - ps.size as change
  265. FROM builds
  266. INNER JOIN size bs ON builds.bid == bs.build_id
  267. INNER JOIN size ps ON builds.pid == ps.build_id
  268. WHERE bs.name == ps.name
  269. )
  270. SELECT
  271. time, hash,
  272. platform, config, target, name,
  273. parent_size, commit_size, change
  274. FROM changes
  275. INNER JOIN build ON bid == build.id
  276. INNER JOIN thing ON tid == thing.id
  277. {where} {order} {limit}
  278. ''',
  279. 'where': 'AND',
  280. 'order': 'time',
  281. 'postprocess': [postprocess_df_time, postprocess_df_changes],
  282. },
  283. },
  284. 'query.build-sizes': {
  285. # SQLite doesn't have PIVOT so we have to script this.
  286. 'help': 'List size changes for a given build',
  287. 'metavar': 'PLATFORM,CONFIG,TARGET',
  288. 'default': '',
  289. },
  290. }
  291. def get_build_sections(db: SizeDatabase, build: str) -> Optional[Tuple]:
  292. """Split a build arg and get its thing_id and sections."""
  293. values, args = argsplit('PLATFORM,CONFIG,TARGET', build)
  294. if not values:
  295. return None
  296. platform = args['PLATFORM']
  297. pconfig = args['CONFIG']
  298. ptarget = args['TARGET']
  299. thing_id = db.select_thing_id(platform, pconfig, ptarget)
  300. if not thing_id:
  301. logging.error('No match for %s,%s,%s', platform, pconfig, ptarget)
  302. return None
  303. sections = db.select_sections_for_thing(thing_id)
  304. if not sections:
  305. logging.warning('No sections for %s,%s,%s', platform, pconfig, ptarget)
  306. return None
  307. return (platform, pconfig, ptarget, thing_id, sections)
  308. def make_build_sizes_query(config: Config, thing_id: str,
  309. sections: List[str]) -> Tuple[List[str], str]:
  310. """Construct and SQL query for all section sizes for a given thing."""
  311. # SQLite doesn't have PIVOT so we need to construct a query with
  312. # a column for each section.
  313. columns = ['time', 'hash', 'pr']
  314. cols = ', '.join(columns)
  315. joins = ''
  316. where = f' WHERE b.thing_id == {thing_id}'
  317. for i, s in enumerate(sections):
  318. columns.append(s)
  319. cols += f', s{i}.size AS s{i}z'
  320. joins += f' INNER JOIN size s{i} ON b.id == s{i}.build_id'
  321. where += f' AND s{i}.name == "{s}"'
  322. if qw := config['query.where']:
  323. where += f' AND {qw}'
  324. query = f'''SELECT {cols}
  325. FROM build b
  326. {joins}
  327. {where}
  328. ORDER BY {config.get('query.order') or 'time'}'''
  329. if limit := config['query.limit']:
  330. query += f' LIMIT {limit}'
  331. return (columns, query)
  332. def query_build_sizes(config: Config, db: SizeDatabase,
  333. build: str) -> Optional[pd.DataFrame]:
  334. """Get all sizes for the given build."""
  335. t = get_build_sections(db, build)
  336. if not t:
  337. return None
  338. platform, pconfig, ptarget, thing_id, sections = t
  339. columns, query = make_build_sizes_query(config, thing_id, sections)
  340. logging.debug('Query: %s', query)
  341. cur = db.execute(query)
  342. rows = cur.fetchall()
  343. if rows:
  344. df = pd.DataFrame(rows, columns=columns)
  345. df.attrs = {
  346. 'name': f'qbs-{build}',
  347. 'title': f'Sizes for {platform} {pconfig} {ptarget}',
  348. }
  349. return postprocess_df_time(config, df)
  350. return None
  351. def main(argv):
  352. status = 0
  353. try:
  354. cfg = {
  355. **memdf.util.config.CONFIG,
  356. **memdf.util.sqlite.CONFIG,
  357. **memdf.report.OUTPUT_CONFIG,
  358. **QUERY_CONFIG,
  359. }
  360. cfg['database.file']['argparse']['required'] = True
  361. config = Config().init(cfg)
  362. config.parse(argv)
  363. db = SizeDatabase(config['database.file'], writable=False)
  364. db.open()
  365. dfs = {}
  366. q = 0
  367. for title, key, values, info in config.get('queries', []):
  368. q += 1
  369. query = make_query(config, info)
  370. logging.debug('Option: %s', key)
  371. logging.debug('Title: %s', title)
  372. logging.debug('Query: %s', query.strip())
  373. logging.debug('With: %s', values)
  374. cur = db.execute(query, values)
  375. columns = [i[0] for i in cur.description]
  376. rows = cur.fetchall()
  377. if rows:
  378. df = pd.DataFrame(rows, columns=columns)
  379. df.attrs = {'name': f'query{q}', 'title': title}
  380. for f in info['sql'].get('postprocess', []):
  381. df = f(config, df)
  382. dfs[df.attrs['name']] = df
  383. if build := config['query.build-sizes']:
  384. q += 1
  385. if (df := query_build_sizes(config, db, build)) is not None:
  386. dfs[df.attrs['name']] = df
  387. if q == 0:
  388. config.argparse.print_help()
  389. return 1
  390. memdf.report.write_dfs(config,
  391. dfs,
  392. hierify=config['hierify'],
  393. title=True,
  394. floatfmt='5.1f')
  395. except Exception as exception:
  396. raise exception
  397. return status
  398. if __name__ == '__main__':
  399. sys.exit(main(sys.argv))