gh_report.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (c) 2021 Project CHIP Authors
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """Generate reports from size artifacts."""
  18. import io
  19. import logging
  20. import re
  21. import sys
  22. from typing import Dict
  23. import fastcore # type: ignore
  24. import memdf.report
  25. import memdf.sizedb
  26. import memdf.util.config
  27. import memdf.util.markdown
  28. import memdf.util.sqlite
  29. import pandas as pd # type: ignore
  30. from memdf import Config, ConfigDescription
  31. from memdf.util.github import Gh
  32. DB_CONFIG: ConfigDescription = {
  33. Config.group_def('database'): {
  34. 'title': 'database options',
  35. },
  36. 'database.readonly': {
  37. 'help': 'Open database read only',
  38. 'default': False,
  39. 'argparse': {
  40. 'alias': ['--db-readonly'],
  41. },
  42. },
  43. }
  44. GITHUB_CONFIG: ConfigDescription = {
  45. Config.group_def('github'): {
  46. 'title': 'github options',
  47. },
  48. 'github.comment': {
  49. 'help': 'Send output as github PR comments',
  50. 'default': False,
  51. 'argparse': {
  52. 'alias': ['--comment'],
  53. },
  54. },
  55. 'github.limit-comments': {
  56. 'help': 'Send no more than COUNT comments',
  57. 'metavar': 'COUNT',
  58. 'default': 0,
  59. 'argparse': {
  60. 'type': int,
  61. },
  62. },
  63. 'github.limit-artifacts': {
  64. 'help': 'Download no more than COUNT artifacts',
  65. 'metavar': 'COUNT',
  66. 'default': 0,
  67. 'argparse': {
  68. 'type': int,
  69. },
  70. },
  71. 'github.limit-pr': {
  72. 'help': 'Report only on PR, if present.',
  73. 'metavar': 'PR',
  74. 'default': 0,
  75. 'argparse': {
  76. 'type': int,
  77. },
  78. },
  79. }
  80. REPORT_CONFIG: ConfigDescription = {
  81. Config.group_map('report'): {
  82. 'group': 'output'
  83. },
  84. 'report.pr': {
  85. 'help': 'Report on pull requests',
  86. 'default': False,
  87. 'argparse': {
  88. 'alias': ['--pr', '--pull-request'],
  89. },
  90. },
  91. 'report.push': {
  92. 'help': 'Report on pushes',
  93. 'default': False,
  94. 'argparse': {
  95. 'alias': ['--push']
  96. },
  97. },
  98. 'report.increases': {
  99. 'help': 'Highlight large increases',
  100. 'metavar': 'PERCENT',
  101. 'default': 0.0,
  102. 'argparse': {
  103. 'alias': ['--threshold'],
  104. 'type': float,
  105. },
  106. },
  107. }
  108. class SizeContext:
  109. """Generate reports from size artifacts."""
  110. comment_format_re = re.compile(r"^<!--ghr-comment-format:(\d+)-->")
  111. def __init__(self, config: Config):
  112. self.config = config
  113. self.gh = Gh(config)
  114. db_file = config.get('database.file', ':memory:')
  115. self.db = memdf.sizedb.SizeDatabase(db_file,
  116. not config['database.readonly'])
  117. self.db.open()
  118. def add_sizes_from_github(self):
  119. """Read size report artifacts from github."""
  120. # Record size artifacts keyed by group and commit_hash to match them up
  121. # after we have the entire list.
  122. size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {}
  123. for a in self.gh.get_size_artifacts():
  124. if a.group not in size_artifacts:
  125. size_artifacts[a.group] = {}
  126. size_artifacts[a.group][a.commit] = a
  127. logging.debug('ASG: artifact %d %s', a.id, a.name)
  128. # Determine required size artifacts.
  129. artifact_limit = self.config['github.limit-artifacts']
  130. required_artifact_ids: set[int] = set()
  131. for group, group_reports in size_artifacts.items():
  132. logging.debug('ASG: group %s', group)
  133. for report in group_reports.values():
  134. if self.should_report(report.event):
  135. if report.parent not in group_reports:
  136. logging.debug('ASN: No match for %s', report.name)
  137. continue
  138. if (artifact_limit
  139. and len(required_artifact_ids) >= artifact_limit):
  140. continue
  141. # We have size information for both this report and its
  142. # parent, so ensure that both artifacts are downloaded.
  143. parent = group_reports[report.parent]
  144. required_artifact_ids.add(report.id)
  145. required_artifact_ids.add(parent.id)
  146. logging.debug('ASM: Match %s', report.parent)
  147. logging.debug('ASR: %s %s', report.id, report.name)
  148. logging.debug('ASP: %s %s', parent.id, parent.name)
  149. # Download and add required artifacts.
  150. for i in required_artifact_ids:
  151. blob = self.gh.download_artifact(i)
  152. if blob:
  153. self.db.add_sizes_from_zipfile(io.BytesIO(blob),
  154. {'artifact': i})
  155. def read_inputs(self):
  156. """Read size report from github and/or local files."""
  157. if self.gh:
  158. self.add_sizes_from_github()
  159. for filename in self.config['args.inputs']:
  160. self.db.add_sizes_from_file(filename)
  161. self.db.commit()
  162. return self
  163. def should_report(self, event: str = '') -> bool:
  164. """Return true if reporting is enabled for the action event."""
  165. if not event:
  166. return self.config['report.pr'] or self.config['report.push']
  167. if event == 'pull_request':
  168. return self.config['report.pr']
  169. return self.config['report.push']
  170. def get_existing_comment(self, pr: int, title: str):
  171. """Check for an existing comment."""
  172. existing_comment = None
  173. existing_comment_format = 0
  174. for comment in self.gh.get_comments_for_pr(pr):
  175. comment_parts = comment.body.partition('\n')
  176. if comment_parts[0].strip() == title:
  177. existing_comment = comment
  178. if m := self.comment_format_re.match(comment_parts[2]):
  179. existing_comment_format = int(m.group(1))
  180. break
  181. return (existing_comment, existing_comment_format)
  182. def get_newest_commit(self, pr: int) -> str:
  183. """Get the hash of the most recent commit on the PR."""
  184. commits = sorted(
  185. self.gh.get_commits_for_pr(pr),
  186. key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}',
  187. reverse=True)
  188. return commits[0].sha if commits else ''
  189. def post_change_report(self, df: pd.DataFrame) -> bool:
  190. """Send a change report as a github comment."""
  191. if not self.gh:
  192. return False
  193. pr = df.attrs['pr']
  194. # Check for an existing size report comment. If one exists, we'll add
  195. # the new information to it.
  196. existing_comment, existing_comment_format = self.get_existing_comment(
  197. pr, df.attrs['title'])
  198. if not existing_comment:
  199. # Check the most recent commit on the PR, so that we don't comment
  200. # for commits that are already outdated.
  201. commit = df.attrs['commit']
  202. latest = self.get_newest_commit(pr)
  203. if commit != latest:
  204. logging.info(
  205. 'SCS: PR #%s: not commenting for stale %s; newest is %s',
  206. pr, commit, latest)
  207. # Return True so that the obsolete artifacts get removed.
  208. return True
  209. if existing_comment_format == 1:
  210. df = V1Comment.merge(df, existing_comment)
  211. else:
  212. existing_comment = None
  213. text = V1Comment.format(self.config, df)
  214. if existing_comment:
  215. return self.gh.update_comment(existing_comment.id, text)
  216. return self.gh.create_comment(pr, text)
  217. def report_matching_commits(self) -> Dict[str, pd.DataFrame]:
  218. """Report on all new comparable commits."""
  219. if not self.should_report():
  220. return {}
  221. comment_count = 0
  222. comment_limit = self.config['github.limit-comments']
  223. comment_enabled = (self.config['github.comment']
  224. or self.config['github.dryrun-comment'])
  225. only_pr = self.config['github.limit-pr']
  226. dfs = {}
  227. commits = self.db.select_matching_commits()
  228. for event, pr, commit, parent in commits.fetchall():
  229. if not self.should_report(event):
  230. continue
  231. # Github doesn't have a way to fetch artifacts associated with a
  232. # particular PR. For testing purposes, filter to a single PR here.
  233. if only_pr and pr != only_pr:
  234. continue
  235. changes = self.db.select_changes(parent, commit)
  236. self.db.delete_builds(changes.stale_builds)
  237. self.gh.delete_artifacts(changes.stale_artifacts)
  238. if not changes.rows:
  239. # Matching commits had no new matching builds.
  240. continue
  241. df = pd.DataFrame(changes.rows, columns=changes.columns)
  242. df.attrs = {
  243. 'name': f'{pr},{parent},{commit}',
  244. 'title': (f'PR #{pr}: ' if pr else '') +
  245. f'Size comparison from {parent} to {commit}',
  246. 'things': changes.things,
  247. 'builds': changes.builds,
  248. 'artifacts': changes.artifacts,
  249. 'pr': pr,
  250. 'commit': commit,
  251. 'parent': parent,
  252. }
  253. dfs[df.attrs['name']] = df
  254. if (event == 'pull_request' and comment_enabled
  255. and (comment_limit == 0 or comment_limit > comment_count)):
  256. if self.post_change_report(df):
  257. # Mark the originating builds, and remove the originating
  258. # artifacts, so that they don't generate duplicate report
  259. # comments.
  260. self.db.set_commented(df.attrs['builds'])
  261. self.gh.delete_artifacts(df.attrs['artifacts'])
  262. comment_count += 1
  263. return dfs
  264. class V1Comment:
  265. """Format of a GitHub comment."""
  266. @staticmethod
  267. def format(config: Config, df: pd.DataFrame):
  268. """Format a GitHub comment."""
  269. threshold_df = None
  270. increase_df = df[df['change'] > 0]
  271. if increase_df.empty:
  272. increase_df = None
  273. elif threshold := config['report.increases']:
  274. threshold_df = df[df['% change'] > threshold]
  275. if threshold_df.empty:
  276. threshold_df = None
  277. decrease_df = df[df['change'] < 0]
  278. if decrease_df.empty:
  279. decrease_df = None
  280. with io.StringIO() as md:
  281. md.write(df.attrs['title'])
  282. md.write('\n<!--ghr-comment-format:1-->\n\n')
  283. if threshold_df is not None:
  284. md.write(f'**Increases above {threshold:.2g}%:**\n\n')
  285. md.write('<!--ghr-report:threshold-->\n\n')
  286. V1Comment.write_df(config, threshold_df, md)
  287. if increase_df is not None:
  288. summary = V1Comment.summary(increase_df)
  289. md.write('<details>\n')
  290. md.write(f'<summary>Increases ({summary})</summary>\n')
  291. md.write('<!--ghr-report:increases-->\n\n')
  292. V1Comment.write_df(config, increase_df, md)
  293. md.write('</details>\n\n')
  294. if decrease_df is not None:
  295. summary = V1Comment.summary(decrease_df)
  296. md.write('<details>\n')
  297. md.write(f'<summary>Decreases ({summary})</summary>\n')
  298. md.write('<!--ghr-report:decreases-->\n\n')
  299. V1Comment.write_df(config, decrease_df, md)
  300. md.write('</details>\n\n')
  301. summary = V1Comment.summary(df)
  302. md.write('<details>\n')
  303. md.write(f'<summary>Full report ({summary})</summary>\n')
  304. md.write('<!--ghr-report:full-->\n\n')
  305. V1Comment.write_df(config, df, md)
  306. md.write('\n</details>\n')
  307. return md.getvalue()
  308. @staticmethod
  309. def summary(df: pd.DataFrame) -> str:
  310. count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
  311. platforms = ', '.join(sorted(list(set(df['platform']))))
  312. return f'{count} build{"" if count == 1 else "s"} for {platforms}'
  313. @staticmethod
  314. def write_df(config: Config, df: pd.DataFrame,
  315. out: memdf.report.OutputOption):
  316. memdf.report.write_df(config,
  317. df,
  318. out,
  319. 'pipe',
  320. hierify=True,
  321. title=False,
  322. floatfmt='5.1f')
  323. @staticmethod
  324. def merge(df: pd.DataFrame, comment) -> pd.DataFrame:
  325. """Merge an existing comment into the DataFrame."""
  326. with io.StringIO(comment.body) as body:
  327. for line in body:
  328. if line.startswith('<!--ghr-report:full-->'):
  329. body.readline() # Blank line before table.
  330. cols, rows = memdf.util.markdown.read_hierified(body)
  331. break
  332. logging.debug('REC: read %d rows', len(rows))
  333. attrs = df.attrs
  334. df = pd.concat([df, pd.DataFrame(data=rows, columns=cols).astype(df.dtypes)],
  335. ignore_index=True)
  336. df.attrs = attrs
  337. return df.sort_values(
  338. by=['platform', 'target', 'config', 'section']).drop_duplicates()
  339. def main(argv):
  340. status = 0
  341. try:
  342. config = Config().init({
  343. **memdf.util.config.CONFIG,
  344. **memdf.util.github.CONFIG,
  345. **memdf.util.sqlite.CONFIG,
  346. **memdf.report.OUTPUT_CONFIG,
  347. **GITHUB_CONFIG,
  348. **DB_CONFIG,
  349. **REPORT_CONFIG,
  350. })
  351. config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
  352. config.parse(argv)
  353. szc = SizeContext(config)
  354. szc.read_inputs()
  355. dfs = szc.report_matching_commits()
  356. memdf.report.write_dfs(config,
  357. dfs,
  358. hierify=True,
  359. title=True,
  360. floatfmt='5.1f')
  361. except Exception as exception:
  362. raise exception
  363. return status
  364. if __name__ == '__main__':
  365. sys.exit(main(sys.argv))