||
- #!/usr/bin/env python3
- #
- # Copyright (c) 2020 Project CHIP Authors
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- import argparse
- import csv
- import datetime
- import io
- import logging
- import os
- import re
- import stat
- import subprocess
- import traceback
- import zipfile
- import coloredlogs
- import github
- import github_fetch_artifacts
- LOG_KEEP_DAYS = 3
- BINARY_KEEP_DAYS = 30
- # Count is reasonably large because each build has multiple artifacts
- # Currently (Sep 2020) each build has 4 artifacts:
- # gn-nrf, gn-linux, examples-esp32, example-nrf
- #
- # We should eventually remove the non-gn version to save space.
- BINARY_MAX_COUNT = 80
- class SectionChange:
- """Describes delta changes to a specific section"""
- def __init__(self, section, fileChange, vmChange):
- self.section = section
- self.fileChange = fileChange
- self.vmChange = vmChange
- class ComparisonResult:
- """Comparison results for an entire file"""
- def __init__(self, name):
- self.fileName = name
- self.sectionChanges = []
- SECTIONS_TO_WATCH = set(
- ['.rodata', '.text', '.flash.rodata', '.flash.text', '.bss', '.data'])
- def filesInDirectory(dirName):
- """Get all the file names in the specified directory."""
- for name in os.listdir(dirName):
- mode = os.stat(os.path.join(dirName, name)).st_mode
- if stat.S_ISREG(mode):
- yield name
- def writeFileBloatReport(f, baselineName, buildName):
- """Generate a bloat report diffing a baseline file with a build output file."""
- logging.info('Running bloaty diff between %s and %s',
- baselineName, buildName)
- f.write('Comparing %s and %s:\n\n' % (baselineName, buildName))
- result = subprocess.run(
- ['bloaty', '--csv', buildName, '--', baselineName],
- stdout=subprocess.PIPE,
- stderr=subprocess.STDOUT,
- )
- if result.returncode != 0:
- logging.warning('Bloaty execution failed: %d', result.returncode)
- f.write('BLOAT EXECUTION FAILED WITH CODE %d:\n' % result.returncode)
- content = result.stdout.decode('utf8')
- f.write(content)
- f.write('\n')
- result = ComparisonResult(os.path.basename(buildName))
- try:
- reader = csv.reader(io.StringIO(content))
- for row in reader:
- section, vm, f = row
- if (section in SECTIONS_TO_WATCH) or (vm not in ['0', 'vmsize']):
- result.sectionChanges.append(
- SectionChange(section, int(f), int(vm)))
- except Exception:
- pass
- return result
- def generateBloatReport(outputFileName,
- baselineDir,
- buildOutputDir,
- title='BLOAT REPORT'):
- """Generates a bloat report fo files between two diferent directories."""
- logging.info('Generating bloat diff report between %s and %s', baselineDir,
- buildOutputDir)
- with open(outputFileName, 'wt') as f:
- f.write(title + '\n\n')
- baselineNames = set([name for name in filesInDirectory(baselineDir)])
- outputNames = set([name for name in filesInDirectory(buildOutputDir)])
- baselineOnly = baselineNames - outputNames
- if baselineOnly:
- logging.warning(
- 'Some files only exist in the baseline: %r', baselineOnly)
- f.write('Files found only in the baseline:\n ')
- f.write('\n %s'.join(baselineOnly))
- f.write('\n\n')
- outputOnly = outputNames - baselineNames
- if outputOnly:
- logging.warning('Some files only exist in the build output: %r',
- outputOnly)
- f.write('Files found only in the build output:\n ')
- f.write('\n %s'.join(outputOnly))
- f.write('\n\n')
- results = []
- for name in (baselineNames & outputNames):
- results.append(
- writeFileBloatReport(f, os.path.join(baselineDir, name),
- os.path.join(buildOutputDir, name)))
- return results
- def sendFileAsPrComment(job_name, filename, gh_token, gh_repo, gh_pr_number,
- compare_results, base_sha):
- """Generates a PR comment containing the specified file content."""
- logging.info('Uploading report to "%s", PR %d', gh_repo, gh_pr_number)
- rawText = open(filename, 'rt').read()
- # a consistent title to help identify obsolete comments
- titleHeading = 'Size increase report for "{jobName}"'.format(
- jobName=job_name)
- api = github.Github(gh_token)
- repo = api.get_repo(gh_repo)
- pull = repo.get_pull(gh_pr_number)
- for comment in pull.get_issue_comments():
- if not comment.body.startswith(titleHeading):
- continue
- logging.info(
- 'Removing obsolete comment with heading "%s"', (titleHeading))
- comment.delete()
- if all(len(file.sectionChanges) == 0 for file in compare_results):
- logging.info('No results to report')
- return
- compareTable = 'File | Section | File | VM\n---- | ---- | ----- | ---- \n'
- for file in compare_results:
- for change in file.sectionChanges:
- compareTable += '{0} | {1} | {2} | {3}\n'.format(file.fileName,
- change.section,
- change.fileChange,
- change.vmChange)
- # NOTE: PRs are issues with attached patches, hence the API naming
- pull.create_issue_comment("""{title} from {baseSha}
- {table}
- <details>
- <summary>Full report output</summary>
- ```
- {rawReportText}
- ```
- </details>
- """.format(title=titleHeading, baseSha=base_sha, table=compareTable, rawReportText=rawText))
- def getPullRequestBaseSha(githubToken, githubRepo, pullRequestNumber):
- """Figure out the SHA for the base of a pull request"""
- api = github.Github(githubToken)
- repo = api.get_repo(githubRepo)
- pull = repo.get_pull(pullRequestNumber)
- return pull.base.sha
- def cleanDir(name):
- """Ensures a clean directory with the given name exists. Only handles files"""
- if os.path.exists(name):
- for fname in os.listdir(name):
- path = os.path.join(name, fname)
- if os.path.isfile(path):
- os.unlink(path)
- else:
- os.mkdir(name)
- def downloadArtifact(artifact, dirName):
- """Extract an artifact into a directory."""
- zipFile = zipfile.ZipFile(io.BytesIO(artifact.downloadBlob()), 'r')
- logging.info('Extracting zip file to %r' % dirName)
- zipFile.extractall(dirName)
- def main():
- """Main task if executed standalone."""
- parser = argparse.ArgumentParser(
- description='Fetch master build artifacts.')
- parser.add_argument(
- '--output-dir',
- type=str,
- default='.',
- help='Where to download the artifacts')
- parser.add_argument(
- '--github-api-token',
- type=str,
- help='Github API token to upload the report as a comment')
- parser.add_argument(
- '--github-repository', type=str, help='Repository to use for PR comments')
- parser.add_argument(
- '--log-level',
- default=logging.INFO,
- type=lambda x: getattr(logging, x),
- help='Configure the logging level.')
- args = parser.parse_args()
- # Ensures somewhat pretty logging of what is going on
- logging.basicConfig(
- level=args.log_level,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- coloredlogs.install()
- if not args.github_api_token:
- logging.error(
- 'Required arguments missing: github api token is required.')
- return
- # all known artifacts
- artifacts = [a for a in github_fetch_artifacts.getAllArtifacts(
- args.github_api_token, args.github_repository)]
- # process newest artifacts first
- artifacts.sort(key=lambda x: x.created_at, reverse=True)
- current_time = datetime.datetime.now()
- seen_names = set()
- pull_artifact_re = re.compile('^(.*)-pull-(\\d+)$')
- binary_count = 0
- for a in artifacts:
- # Ignore size reports; they are handled by a separate script.
- if a.name.startswith('Size,'):
- continue
- # logs cleanup after 3 days
- is_log = a.name.endswith('-logs')
- if not is_log:
- binary_count = binary_count + 1
- need_delete = False
- if (current_time - a.created_at).days > BINARY_KEEP_DAYS:
- # Do not keep binary builds forever
- need_delete = True
- elif not is_log and binary_count > BINARY_MAX_COUNT:
- # Keep a maximum number of binary packages
- need_delete = True
- elif is_log and (current_time - a.created_at).days > LOG_KEEP_DAYS:
- # Logs are kept even shorter
- need_delete = True
- if need_delete:
- logging.info('Old artifact: %s from %r' % (a.name, a.created_at))
- a.delete()
- continue
- if a.name.endswith('-logs'):
- # logs names are duplicate, however that is fine
- continue
- if a.name in seen_names:
- logging.info('Artifact name already seen before: %s' % a.name)
- a.delete()
- continue
- seen_names.add(a.name)
- m = pull_artifact_re.match(a.name)
- if not m:
- logging.info('Non-PR artifact found: %r from %r' %
- (a.name, a.created_at))
- continue
- prefix = m.group(1)
- pull_number = int(m.group(2))
- logging.info('Processing PR %s via artifact %r' %
- (pull_number, a.name))
- try:
- base_sha = getPullRequestBaseSha(
- args.github_api_token, args.github_repository, pull_number)
- base_artifact_name = '%s-%s' % (prefix, base_sha)
- base_artifacts = [
- v for v in artifacts if v.name == base_artifact_name]
- if len(base_artifacts) != 1:
- raise Exception('Did not find exactly one artifact for %s: %r' % (
- base_artifact_name, [v.name for v in base_artifacts]))
- b = base_artifacts[0]
- logging.info('Diff will be against artifact %r' % b.name)
- aOutput = os.path.join(args.output_dir, 'pull_artifact')
- bOutput = os.path.join(args.output_dir, 'master_artifact')
- cleanDir(aOutput)
- cleanDir(bOutput)
- downloadArtifact(a, aOutput)
- downloadArtifact(b, bOutput)
- report_name = os.path.join(aOutput, 'report.csv')
- results = generateBloatReport(report_name, bOutput, aOutput)
- sendFileAsPrComment(prefix, report_name, args.github_api_token,
- args.github_repository, pull_number, results, base_sha)
- # If running over a top level directory, ensure git sees no output
- cleanDir(aOutput)
- cleanDir(bOutput)
- # Output processed.
- a.delete()
- except Exception:
- tb = traceback.format_exc()
- logging.warning('Failed to process bloat report: %s', tb)
- if __name__ == '__main__':
- # execute only if run as a script
- main()
|