#!/usr/bin/env python3 # # Copyright (c) 2021 Project CHIP Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Generate reports from size artifacts.""" import io import logging import re import sys from typing import Dict import fastcore # type: ignore import memdf.report import memdf.sizedb import memdf.util.config import memdf.util.markdown import memdf.util.sqlite import pandas as pd # type: ignore from memdf import Config, ConfigDescription from memdf.util.github import Gh DB_CONFIG: ConfigDescription = { Config.group_def('database'): { 'title': 'database options', }, 'database.readonly': { 'help': 'Open database read only', 'default': False, 'argparse': { 'alias': ['--db-readonly'], }, }, } GITHUB_CONFIG: ConfigDescription = { Config.group_def('github'): { 'title': 'github options', }, 'github.comment': { 'help': 'Send output as github PR comments', 'default': False, 'argparse': { 'alias': ['--comment'], }, }, 'github.limit-comments': { 'help': 'Send no more than COUNT comments', 'metavar': 'COUNT', 'default': 0, 'argparse': { 'type': int, }, }, 'github.limit-artifacts': { 'help': 'Download no more than COUNT artifacts', 'metavar': 'COUNT', 'default': 0, 'argparse': { 'type': int, }, }, 'github.limit-pr': { 'help': 'Report only on PR, if present.', 'metavar': 'PR', 'default': 0, 'argparse': { 'type': int, }, }, } REPORT_CONFIG: ConfigDescription = { Config.group_map('report'): { 'group': 'output' }, 'report.pr': { 'help': 'Report on pull requests', 'default': False, 'argparse': { 'alias': ['--pr', '--pull-request'], }, }, 'report.push': { 'help': 'Report on pushes', 'default': False, 'argparse': { 'alias': ['--push'] }, }, 'report.increases': { 'help': 'Highlight large increases', 'metavar': 'PERCENT', 'default': 0.0, 'argparse': { 'alias': ['--threshold'], 'type': float, }, }, } class SizeContext: """Generate reports from size artifacts.""" comment_format_re = re.compile(r"^") def __init__(self, config: Config): self.config = config self.gh = Gh(config) db_file = config.get('database.file', ':memory:') self.db = memdf.sizedb.SizeDatabase(db_file, not config['database.readonly']) self.db.open() def add_sizes_from_github(self): """Read size report artifacts from github.""" # Record size artifacts keyed by group and commit_hash to match them up # after we have the entire list. size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {} for a in self.gh.get_size_artifacts(): if a.group not in size_artifacts: size_artifacts[a.group] = {} size_artifacts[a.group][a.commit] = a logging.debug('ASG: artifact %d %s', a.id, a.name) # Determine required size artifacts. artifact_limit = self.config['github.limit-artifacts'] required_artifact_ids: set[int] = set() for group, group_reports in size_artifacts.items(): logging.debug('ASG: group %s', group) for report in group_reports.values(): if self.should_report(report.event): if report.parent not in group_reports: logging.debug('ASN: No match for %s', report.name) continue if (artifact_limit and len(required_artifact_ids) >= artifact_limit): continue # We have size information for both this report and its # parent, so ensure that both artifacts are downloaded. parent = group_reports[report.parent] required_artifact_ids.add(report.id) required_artifact_ids.add(parent.id) logging.debug('ASM: Match %s', report.parent) logging.debug('ASR: %s %s', report.id, report.name) logging.debug('ASP: %s %s', parent.id, parent.name) # Download and add required artifacts. for i in required_artifact_ids: blob = self.gh.download_artifact(i) if blob: self.db.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i}) def read_inputs(self): """Read size report from github and/or local files.""" if self.gh: self.add_sizes_from_github() for filename in self.config['args.inputs']: self.db.add_sizes_from_file(filename) self.db.commit() return self def should_report(self, event: str = '') -> bool: """Return true if reporting is enabled for the action event.""" if not event: return self.config['report.pr'] or self.config['report.push'] if event == 'pull_request': return self.config['report.pr'] return self.config['report.push'] def get_existing_comment(self, pr: int, title: str): """Check for an existing comment.""" existing_comment = None existing_comment_format = 0 for comment in self.gh.get_comments_for_pr(pr): comment_parts = comment.body.partition('\n') if comment_parts[0].strip() == title: existing_comment = comment if m := self.comment_format_re.match(comment_parts[2]): existing_comment_format = int(m.group(1)) break return (existing_comment, existing_comment_format) def get_newest_commit(self, pr: int) -> str: """Get the hash of the most recent commit on the PR.""" commits = sorted( self.gh.get_commits_for_pr(pr), key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}', reverse=True) return commits[0].sha if commits else '' def post_change_report(self, df: pd.DataFrame) -> bool: """Send a change report as a github comment.""" if not self.gh: return False pr = df.attrs['pr'] # Check for an existing size report comment. If one exists, we'll add # the new information to it. existing_comment, existing_comment_format = self.get_existing_comment( pr, df.attrs['title']) if not existing_comment: # Check the most recent commit on the PR, so that we don't comment # for commits that are already outdated. commit = df.attrs['commit'] latest = self.get_newest_commit(pr) if commit != latest: logging.info( 'SCS: PR #%s: not commenting for stale %s; newest is %s', pr, commit, latest) # Return True so that the obsolete artifacts get removed. return True if existing_comment_format == 1: df = V1Comment.merge(df, existing_comment) else: existing_comment = None text = V1Comment.format(self.config, df) if existing_comment: return self.gh.update_comment(existing_comment.id, text) return self.gh.create_comment(pr, text) def report_matching_commits(self) -> Dict[str, pd.DataFrame]: """Report on all new comparable commits.""" if not self.should_report(): return {} comment_count = 0 comment_limit = self.config['github.limit-comments'] comment_enabled = (self.config['github.comment'] or self.config['github.dryrun-comment']) only_pr = self.config['github.limit-pr'] dfs = {} commits = self.db.select_matching_commits() for event, pr, commit, parent in commits.fetchall(): if not self.should_report(event): continue # Github doesn't have a way to fetch artifacts associated with a # particular PR. For testing purposes, filter to a single PR here. if only_pr and pr != only_pr: continue changes = self.db.select_changes(parent, commit) self.db.delete_builds(changes.stale_builds) self.gh.delete_artifacts(changes.stale_artifacts) if not changes.rows: # Matching commits had no new matching builds. continue df = pd.DataFrame(changes.rows, columns=changes.columns) df.attrs = { 'name': f'{pr},{parent},{commit}', 'title': (f'PR #{pr}: ' if pr else '') + f'Size comparison from {parent} to {commit}', 'things': changes.things, 'builds': changes.builds, 'artifacts': changes.artifacts, 'pr': pr, 'commit': commit, 'parent': parent, } dfs[df.attrs['name']] = df if (event == 'pull_request' and comment_enabled and (comment_limit == 0 or comment_limit > comment_count)): if self.post_change_report(df): # Mark the originating builds, and remove the originating # artifacts, so that they don't generate duplicate report # comments. self.db.set_commented(df.attrs['builds']) self.gh.delete_artifacts(df.attrs['artifacts']) comment_count += 1 return dfs class V1Comment: """Format of a GitHub comment.""" @staticmethod def format(config: Config, df: pd.DataFrame): """Format a GitHub comment.""" threshold_df = None increase_df = df[df['change'] > 0] if increase_df.empty: increase_df = None elif threshold := config['report.increases']: threshold_df = df[df['% change'] > threshold] if threshold_df.empty: threshold_df = None decrease_df = df[df['change'] < 0] if decrease_df.empty: decrease_df = None with io.StringIO() as md: md.write(df.attrs['title']) md.write('\n\n\n') if threshold_df is not None: md.write(f'**Increases above {threshold:.2g}%:**\n\n') md.write('\n\n') V1Comment.write_df(config, threshold_df, md) if increase_df is not None: summary = V1Comment.summary(increase_df) md.write('
\n') md.write(f'Increases ({summary})\n') md.write('\n\n') V1Comment.write_df(config, increase_df, md) md.write('
\n\n') if decrease_df is not None: summary = V1Comment.summary(decrease_df) md.write('
\n') md.write(f'Decreases ({summary})\n') md.write('\n\n') V1Comment.write_df(config, decrease_df, md) md.write('
\n\n') summary = V1Comment.summary(df) md.write('
\n') md.write(f'Full report ({summary})\n') md.write('\n\n') V1Comment.write_df(config, df, md) md.write('\n
\n') return md.getvalue() @staticmethod def summary(df: pd.DataFrame) -> str: count = df[['platform', 'target', 'config']].drop_duplicates().shape[0] platforms = ', '.join(sorted(list(set(df['platform'])))) return f'{count} build{"" if count == 1 else "s"} for {platforms}' @staticmethod def write_df(config: Config, df: pd.DataFrame, out: memdf.report.OutputOption): memdf.report.write_df(config, df, out, 'pipe', hierify=True, title=False, floatfmt='5.1f') @staticmethod def merge(df: pd.DataFrame, comment) -> pd.DataFrame: """Merge an existing comment into the DataFrame.""" with io.StringIO(comment.body) as body: for line in body: if line.startswith(''): body.readline() # Blank line before table. cols, rows = memdf.util.markdown.read_hierified(body) break logging.debug('REC: read %d rows', len(rows)) attrs = df.attrs df = pd.concat([df, pd.DataFrame(data=rows, columns=cols).astype(df.dtypes)], ignore_index=True) df.attrs = attrs return df.sort_values( by=['platform', 'target', 'config', 'section']).drop_duplicates() def main(argv): status = 0 try: config = Config().init({ **memdf.util.config.CONFIG, **memdf.util.github.CONFIG, **memdf.util.sqlite.CONFIG, **memdf.report.OUTPUT_CONFIG, **GITHUB_CONFIG, **DB_CONFIG, **REPORT_CONFIG, }) config.argparse.add_argument('inputs', metavar='FILE', nargs='*') config.parse(argv) szc = SizeContext(config) szc.read_inputs() dfs = szc.report_matching_commits() memdf.report.write_dfs(config, dfs, hierify=True, title=True, floatfmt='5.1f') except Exception as exception: raise exception return status if __name__ == '__main__': sys.exit(main(sys.argv))