| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- #!/usr/bin/env python3
- #
- # Copyright (c) 2021 Project CHIP Authors
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- """Generate reports from size artifacts."""
- import io
- import logging
- import re
- import sys
- from typing import Dict
- import fastcore # type: ignore
- import memdf.report
- import memdf.sizedb
- import memdf.util.config
- import memdf.util.markdown
- import memdf.util.sqlite
- import pandas as pd # type: ignore
- from memdf import Config, ConfigDescription
- from memdf.util.github import Gh
- DB_CONFIG: ConfigDescription = {
- Config.group_def('database'): {
- 'title': 'database options',
- },
- 'database.readonly': {
- 'help': 'Open database read only',
- 'default': False,
- 'argparse': {
- 'alias': ['--db-readonly'],
- },
- },
- }
- GITHUB_CONFIG: ConfigDescription = {
- Config.group_def('github'): {
- 'title': 'github options',
- },
- 'github.comment': {
- 'help': 'Send output as github PR comments',
- 'default': False,
- 'argparse': {
- 'alias': ['--comment'],
- },
- },
- 'github.limit-comments': {
- 'help': 'Send no more than COUNT comments',
- 'metavar': 'COUNT',
- 'default': 0,
- 'argparse': {
- 'type': int,
- },
- },
- 'github.limit-artifacts': {
- 'help': 'Download no more than COUNT artifacts',
- 'metavar': 'COUNT',
- 'default': 0,
- 'argparse': {
- 'type': int,
- },
- },
- 'github.limit-pr': {
- 'help': 'Report only on PR, if present.',
- 'metavar': 'PR',
- 'default': 0,
- 'argparse': {
- 'type': int,
- },
- },
- }
- REPORT_CONFIG: ConfigDescription = {
- Config.group_map('report'): {
- 'group': 'output'
- },
- 'report.pr': {
- 'help': 'Report on pull requests',
- 'default': False,
- 'argparse': {
- 'alias': ['--pr', '--pull-request'],
- },
- },
- 'report.push': {
- 'help': 'Report on pushes',
- 'default': False,
- 'argparse': {
- 'alias': ['--push']
- },
- },
- 'report.increases': {
- 'help': 'Highlight large increases',
- 'metavar': 'PERCENT',
- 'default': 0.0,
- 'argparse': {
- 'alias': ['--threshold'],
- 'type': float,
- },
- },
- }
- class SizeContext:
- """Generate reports from size artifacts."""
- comment_format_re = re.compile(r"^<!--ghr-comment-format:(\d+)-->")
- def __init__(self, config: Config):
- self.config = config
- self.gh = Gh(config)
- db_file = config.get('database.file', ':memory:')
- self.db = memdf.sizedb.SizeDatabase(db_file,
- not config['database.readonly'])
- self.db.open()
- def add_sizes_from_github(self):
- """Read size report artifacts from github."""
- # Record size artifacts keyed by group and commit_hash to match them up
- # after we have the entire list.
- size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {}
- for a in self.gh.get_size_artifacts():
- if a.group not in size_artifacts:
- size_artifacts[a.group] = {}
- size_artifacts[a.group][a.commit] = a
- logging.debug('ASG: artifact %d %s', a.id, a.name)
- # Determine required size artifacts.
- artifact_limit = self.config['github.limit-artifacts']
- required_artifact_ids: set[int] = set()
- for group, group_reports in size_artifacts.items():
- logging.debug('ASG: group %s', group)
- for report in group_reports.values():
- if self.should_report(report.event):
- if report.parent not in group_reports:
- logging.debug('ASN: No match for %s', report.name)
- continue
- if (artifact_limit
- and len(required_artifact_ids) >= artifact_limit):
- continue
- # We have size information for both this report and its
- # parent, so ensure that both artifacts are downloaded.
- parent = group_reports[report.parent]
- required_artifact_ids.add(report.id)
- required_artifact_ids.add(parent.id)
- logging.debug('ASM: Match %s', report.parent)
- logging.debug('ASR: %s %s', report.id, report.name)
- logging.debug('ASP: %s %s', parent.id, parent.name)
- # Download and add required artifacts.
- for i in required_artifact_ids:
- blob = self.gh.download_artifact(i)
- if blob:
- self.db.add_sizes_from_zipfile(io.BytesIO(blob),
- {'artifact': i})
- def read_inputs(self):
- """Read size report from github and/or local files."""
- if self.gh:
- self.add_sizes_from_github()
- for filename in self.config['args.inputs']:
- self.db.add_sizes_from_file(filename)
- self.db.commit()
- return self
- def should_report(self, event: str = '') -> bool:
- """Return true if reporting is enabled for the action event."""
- if not event:
- return self.config['report.pr'] or self.config['report.push']
- if event == 'pull_request':
- return self.config['report.pr']
- return self.config['report.push']
- def get_existing_comment(self, pr: int, title: str):
- """Check for an existing comment."""
- existing_comment = None
- existing_comment_format = 0
- for comment in self.gh.get_comments_for_pr(pr):
- comment_parts = comment.body.partition('\n')
- if comment_parts[0].strip() == title:
- existing_comment = comment
- if m := self.comment_format_re.match(comment_parts[2]):
- existing_comment_format = int(m.group(1))
- break
- return (existing_comment, existing_comment_format)
- def get_newest_commit(self, pr: int) -> str:
- """Get the hash of the most recent commit on the PR."""
- commits = sorted(
- self.gh.get_commits_for_pr(pr),
- key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}',
- reverse=True)
- return commits[0].sha if commits else ''
- def post_change_report(self, df: pd.DataFrame) -> bool:
- """Send a change report as a github comment."""
- if not self.gh:
- return False
- pr = df.attrs['pr']
- # Check for an existing size report comment. If one exists, we'll add
- # the new information to it.
- existing_comment, existing_comment_format = self.get_existing_comment(
- pr, df.attrs['title'])
- if not existing_comment:
- # Check the most recent commit on the PR, so that we don't comment
- # for commits that are already outdated.
- commit = df.attrs['commit']
- latest = self.get_newest_commit(pr)
- if commit != latest:
- logging.info(
- 'SCS: PR #%s: not commenting for stale %s; newest is %s',
- pr, commit, latest)
- # Return True so that the obsolete artifacts get removed.
- return True
- if existing_comment_format == 1:
- df = V1Comment.merge(df, existing_comment)
- else:
- existing_comment = None
- text = V1Comment.format(self.config, df)
- if existing_comment:
- return self.gh.update_comment(existing_comment.id, text)
- return self.gh.create_comment(pr, text)
- def report_matching_commits(self) -> Dict[str, pd.DataFrame]:
- """Report on all new comparable commits."""
- if not self.should_report():
- return {}
- comment_count = 0
- comment_limit = self.config['github.limit-comments']
- comment_enabled = (self.config['github.comment']
- or self.config['github.dryrun-comment'])
- only_pr = self.config['github.limit-pr']
- dfs = {}
- commits = self.db.select_matching_commits()
- for event, pr, commit, parent in commits.fetchall():
- if not self.should_report(event):
- continue
- # Github doesn't have a way to fetch artifacts associated with a
- # particular PR. For testing purposes, filter to a single PR here.
- if only_pr and pr != only_pr:
- continue
- changes = self.db.select_changes(parent, commit)
- self.db.delete_builds(changes.stale_builds)
- self.gh.delete_artifacts(changes.stale_artifacts)
- if not changes.rows:
- # Matching commits had no new matching builds.
- continue
- df = pd.DataFrame(changes.rows, columns=changes.columns)
- df.attrs = {
- 'name': f'{pr},{parent},{commit}',
- 'title': (f'PR #{pr}: ' if pr else '') +
- f'Size comparison from {parent} to {commit}',
- 'things': changes.things,
- 'builds': changes.builds,
- 'artifacts': changes.artifacts,
- 'pr': pr,
- 'commit': commit,
- 'parent': parent,
- }
- dfs[df.attrs['name']] = df
- if (event == 'pull_request' and comment_enabled
- and (comment_limit == 0 or comment_limit > comment_count)):
- if self.post_change_report(df):
- # Mark the originating builds, and remove the originating
- # artifacts, so that they don't generate duplicate report
- # comments.
- self.db.set_commented(df.attrs['builds'])
- self.gh.delete_artifacts(df.attrs['artifacts'])
- comment_count += 1
- return dfs
- class V1Comment:
- """Format of a GitHub comment."""
- @staticmethod
- def format(config: Config, df: pd.DataFrame):
- """Format a GitHub comment."""
- threshold_df = None
- increase_df = df[df['change'] > 0]
- if increase_df.empty:
- increase_df = None
- elif threshold := config['report.increases']:
- threshold_df = df[df['% change'] > threshold]
- if threshold_df.empty:
- threshold_df = None
- decrease_df = df[df['change'] < 0]
- if decrease_df.empty:
- decrease_df = None
- with io.StringIO() as md:
- md.write(df.attrs['title'])
- md.write('\n<!--ghr-comment-format:1-->\n\n')
- if threshold_df is not None:
- md.write(f'**Increases above {threshold:.2g}%:**\n\n')
- md.write('<!--ghr-report:threshold-->\n\n')
- V1Comment.write_df(config, threshold_df, md)
- if increase_df is not None:
- summary = V1Comment.summary(increase_df)
- md.write('<details>\n')
- md.write(f'<summary>Increases ({summary})</summary>\n')
- md.write('<!--ghr-report:increases-->\n\n')
- V1Comment.write_df(config, increase_df, md)
- md.write('</details>\n\n')
- if decrease_df is not None:
- summary = V1Comment.summary(decrease_df)
- md.write('<details>\n')
- md.write(f'<summary>Decreases ({summary})</summary>\n')
- md.write('<!--ghr-report:decreases-->\n\n')
- V1Comment.write_df(config, decrease_df, md)
- md.write('</details>\n\n')
- summary = V1Comment.summary(df)
- md.write('<details>\n')
- md.write(f'<summary>Full report ({summary})</summary>\n')
- md.write('<!--ghr-report:full-->\n\n')
- V1Comment.write_df(config, df, md)
- md.write('\n</details>\n')
- return md.getvalue()
- @staticmethod
- def summary(df: pd.DataFrame) -> str:
- count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
- platforms = ', '.join(sorted(list(set(df['platform']))))
- return f'{count} build{"" if count == 1 else "s"} for {platforms}'
- @staticmethod
- def write_df(config: Config, df: pd.DataFrame,
- out: memdf.report.OutputOption):
- memdf.report.write_df(config,
- df,
- out,
- 'pipe',
- hierify=True,
- title=False,
- floatfmt='5.1f')
- @staticmethod
- def merge(df: pd.DataFrame, comment) -> pd.DataFrame:
- """Merge an existing comment into the DataFrame."""
- with io.StringIO(comment.body) as body:
- for line in body:
- if line.startswith('<!--ghr-report:full-->'):
- body.readline() # Blank line before table.
- cols, rows = memdf.util.markdown.read_hierified(body)
- break
- logging.debug('REC: read %d rows', len(rows))
- attrs = df.attrs
- df = pd.concat([df, pd.DataFrame(data=rows, columns=cols).astype(df.dtypes)],
- ignore_index=True)
- df.attrs = attrs
- return df.sort_values(
- by=['platform', 'target', 'config', 'section']).drop_duplicates()
- def main(argv):
- status = 0
- try:
- config = Config().init({
- **memdf.util.config.CONFIG,
- **memdf.util.github.CONFIG,
- **memdf.util.sqlite.CONFIG,
- **memdf.report.OUTPUT_CONFIG,
- **GITHUB_CONFIG,
- **DB_CONFIG,
- **REPORT_CONFIG,
- })
- config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
- config.parse(argv)
- szc = SizeContext(config)
- szc.read_inputs()
- dfs = szc.report_matching_commits()
- memdf.report.write_dfs(config,
- dfs,
- hierify=True,
- title=True,
- floatfmt='5.1f')
- except Exception as exception:
- raise exception
- return status
- if __name__ == '__main__':
- sys.exit(main(sys.argv))
|