sizedb.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. #
  2. # Copyright (c) 2021 Project CHIP Authors
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. """Sqlite3 database of binary sizes over time."""
  17. import collections
  18. import json
  19. import logging
  20. import sqlite3
  21. import zipfile
  22. from pathlib import Path
  23. from typing import IO, Dict, Iterable, List, Optional, Union
  24. import memdf.util.sqlite
  25. ChangeInfo = collections.namedtuple('ChangeInfo', [
  26. 'columns', 'rows', 'things', 'builds', 'stale_builds', 'artifacts',
  27. 'stale_artifacts'
  28. ])
  29. class SizeDatabase(memdf.util.sqlite.Database):
  30. """A database for recording and comparing size reports."""
  31. on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"]
  32. on_writable = [
  33. """
  34. -- A ‘thing’ identifies the kind of built object.
  35. -- Builds of the same thing are comparable.
  36. CREATE TABLE IF NOT EXISTS thing (
  37. id INTEGER PRIMARY KEY,
  38. platform TEXT NOT NULL, -- Build platform
  39. config TEXT NOT NULL, -- Build configuration discriminator
  40. target TEXT NOT NULL, -- Build target
  41. UNIQUE(platform, config, target)
  42. )
  43. """, """
  44. -- A ‘build’ identifies a built instance of a thing at some point.
  45. CREATE TABLE IF NOT EXISTS build (
  46. id INTEGER PRIMARY KEY,
  47. thing_id INTEGER REFERENCES thing(id),
  48. hash TEXT NOT NULL, -- Commit hash
  49. parent TEXT NOT NULL, -- Parent commit hash
  50. pr INTEGER DEFAULT 0, -- Github PR number
  51. time INTEGER NOT NULL, -- Unix-epoch timestamp
  52. artifact INTEGER DEFAULT 0, -- Github artifact ID
  53. commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment
  54. ref TEXT, -- Target git ref
  55. event TEXT, -- Github build trigger event
  56. UNIQUE(thing_id, hash, parent, pr, time, artifact)
  57. )
  58. """, """
  59. -- A ‘size’ entry gives the size of a section for a particular build.
  60. CREATE TABLE IF NOT EXISTS size (
  61. build_id INTEGER REFERENCES build(id),
  62. name TEXT NOT NULL, -- Section name
  63. size INTEGER NOT NULL, -- Section size in bytes
  64. PRIMARY KEY (build_id, name)
  65. )
  66. """
  67. ]
  68. def __init__(self, filename: str, writable: bool = True):
  69. super().__init__(filename, writable)
  70. def add_sizes(self, **kwargs):
  71. """
  72. Add a size report to the database.
  73. The incoming arguments must contain the required non-ID column names
  74. from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
  75. sequence of mappings containing 'name' and 'size'.
  76. """
  77. td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
  78. thing = self.store_and_return_id('thing', **td)
  79. bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
  80. if 'ref' in kwargs:
  81. bd['ref'] = kwargs['ref']
  82. cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
  83. build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
  84. if build is None:
  85. logging.error('Failed to store %s %s %s', thing, bd, cd)
  86. else:
  87. for d in kwargs['sizes']:
  88. self.store('size', build_id=build, **d)
  89. def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
  90. """Add sizes from a JSON size report."""
  91. r = origin.copy()
  92. r.update(json.loads(s))
  93. r['sizes'] = []
  94. # Add section sizes.
  95. for i in r['frames'].get('section', []):
  96. r['sizes'].append({'name': i['section'], 'size': i['size']})
  97. # Add segment sizes.
  98. for i in r['frames'].get('wr', []):
  99. r['sizes'].append({
  100. 'name': ('(read only)', '(read/write)')[int(i['wr'])],
  101. 'size':
  102. i['size']
  103. })
  104. self.add_sizes(**r)
  105. def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict):
  106. """Add size reports from a zip."""
  107. with zipfile.ZipFile(f, 'r') as zip_file:
  108. for i in zip_file.namelist():
  109. if i.endswith('-sizes.json'):
  110. origin['member'] = i
  111. with zip_file.open(i) as member:
  112. self.add_sizes_from_json(member.read(), origin)
  113. def add_sizes_from_file(self, filename: str):
  114. """Add size reports from a file."""
  115. origin = {'file': filename}
  116. path = Path(filename)
  117. if path.suffix == '.json':
  118. logging.info('ASJ: reading JSON %s', path)
  119. with open(path, encoding='utf-8') as f:
  120. self.add_sizes_from_json(f.read(), origin)
  121. elif path.suffix == '.zip':
  122. logging.info('ASZ: reading ZIP %s', path)
  123. self.add_sizes_from_zipfile(path, origin)
  124. else:
  125. logging.warning('Unknown file type "%s" ignored', filename)
  126. def select_thing_id(self, platform: str, config: str,
  127. target: str) -> Optional[str]:
  128. cur = self.execute(
  129. 'SELECT id FROM thing WHERE platform=? AND config=? AND target=?',
  130. (platform, config, target))
  131. row = cur.fetchone()
  132. return row[0] if row else None
  133. def select_sections_for_thing(self, thing: str) -> List[str]:
  134. cur = self.execute(
  135. '''
  136. SELECT DISTINCT name FROM size WHERE build_id = (
  137. SELECT DISTINCT id FROM build WHERE thing_id == ?)
  138. ORDER BY name
  139. ''', (thing,))
  140. return [row[0] for row in cur.fetchall()]
  141. def select_matching_commits(self):
  142. """Find matching builds, where one's commit is the other's parent."""
  143. return self.execute('''
  144. SELECT DISTINCT
  145. c.event as event,
  146. c.pr AS pr,
  147. c.hash AS hash,
  148. p.hash AS parent
  149. FROM build c
  150. INNER JOIN build p ON p.hash = c.parent
  151. WHERE c.commented = 0
  152. ORDER BY c.time DESC, c.pr, c.hash, p.hash
  153. ''')
  154. def select_changes(self, parent: str, commit: str) -> ChangeInfo:
  155. """Returns size changes between the given commits."""
  156. cur = self.execute(
  157. '''
  158. SELECT DISTINCT
  159. t.id AS thing,
  160. cb.artifact AS artifact,
  161. pb.id AS parent_build,
  162. cb.id AS commit_build,
  163. t.platform, t.config, t.target,
  164. cs.name AS name,
  165. ps.size AS parent_size,
  166. cs.size AS commit_size,
  167. cb.time AS time
  168. FROM thing t
  169. INNER JOIN build cb ON cb.thing_id = t.id
  170. INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent
  171. INNER JOIN size cs ON cs.build_id = cb.id
  172. INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name
  173. WHERE cb.hash = ? AND pb.hash = ?
  174. ORDER BY t.platform, t.config, t.target,
  175. cs.name, cb.time DESC, pb.time DESC
  176. ''', (commit, parent))
  177. keep = ('platform', 'target', 'config', 'name', 'parent_size',
  178. 'commit_size')
  179. things: set[int] = set()
  180. artifacts: set[int] = set()
  181. builds: set[int] = set()
  182. stale_builds: set[int] = set()
  183. stale_artifacts: set[int] = set()
  184. previous: Optional[sqlite3.Row] = None
  185. rows = []
  186. for row in cur.fetchall():
  187. row = sqlite3.Row(cur, row)
  188. things.add(row['thing'])
  189. if (previous is not None and row['thing'] == previous['thing']
  190. and row['name'] == previous['name']):
  191. # This is duplicate build, older because we sort descending,
  192. # presumably from a partial workflow re-run.
  193. if row['parent_build'] != previous['parent_build']:
  194. stale_builds.add(row['parent_build'])
  195. if row['commit_build'] != previous['commit_build']:
  196. stale_builds.add(row['commit_build'])
  197. stale_artifacts.add(row['artifact'])
  198. else:
  199. previous = row
  200. new = [row[k] for k in keep]
  201. parent_size = row['parent_size']
  202. commit_size = row['commit_size']
  203. new.append(commit_size - parent_size)
  204. new.append(self.percent_change(parent_size, commit_size))
  205. rows.append(new)
  206. artifacts.add(row['artifact'])
  207. builds.add(row['commit_build'])
  208. return ChangeInfo(('platform', 'target', 'config', 'section',
  209. parent[:8], commit[:8], 'change', '% change'), rows,
  210. things, builds, stale_builds, artifacts,
  211. stale_artifacts)
  212. def set_commented(self, build_ids: Iterable[int]):
  213. """Set the commented flag for the given builds."""
  214. if not build_ids:
  215. return
  216. for build_id in build_ids:
  217. self.execute('UPDATE build SET commented = 1 WHERE id = ?',
  218. (build_id, ))
  219. self.commit()
  220. def delete_builds(self, build_ids: Iterable[int]):
  221. """Delete the given builds."""
  222. if not build_ids:
  223. return
  224. for build_id in build_ids:
  225. self.execute('DELETE FROM size WHERE build_id = ?', (build_id, ))
  226. self.execute('DELETE FROM build WHERE id = ?', (build_id, ))
  227. self.commit()
  228. @staticmethod
  229. def percent_change(a: int, b: int) -> float:
  230. if a == 0:
  231. return 0.0 if b == 0 else float('inf')
  232. return 100. * (b - a) / a