bloat_check.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright (c) 2020 Project CHIP Authors
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. import argparse
  18. import csv
  19. import datetime
  20. import io
  21. import logging
  22. import os
  23. import re
  24. import stat
  25. import subprocess
  26. import traceback
  27. import zipfile
  28. import coloredlogs
  29. import github
  30. import github_fetch_artifacts
  31. LOG_KEEP_DAYS = 3
  32. BINARY_KEEP_DAYS = 30
  33. # Count is reasonably large because each build has multiple artifacts
  34. # Currently (Sep 2020) each build has 4 artifacts:
  35. # gn-nrf, gn-linux, examples-esp32, example-nrf
  36. #
  37. # We should eventually remove the non-gn version to save space.
  38. BINARY_MAX_COUNT = 80
  39. class SectionChange:
  40. """Describes delta changes to a specific section"""
  41. def __init__(self, section, fileChange, vmChange):
  42. self.section = section
  43. self.fileChange = fileChange
  44. self.vmChange = vmChange
  45. class ComparisonResult:
  46. """Comparison results for an entire file"""
  47. def __init__(self, name):
  48. self.fileName = name
  49. self.sectionChanges = []
  50. SECTIONS_TO_WATCH = set(
  51. ['.rodata', '.text', '.flash.rodata', '.flash.text', '.bss', '.data'])
  52. def filesInDirectory(dirName):
  53. """Get all the file names in the specified directory."""
  54. for name in os.listdir(dirName):
  55. mode = os.stat(os.path.join(dirName, name)).st_mode
  56. if stat.S_ISREG(mode):
  57. yield name
  58. def writeFileBloatReport(f, baselineName, buildName):
  59. """Generate a bloat report diffing a baseline file with a build output file."""
  60. logging.info('Running bloaty diff between %s and %s',
  61. baselineName, buildName)
  62. f.write('Comparing %s and %s:\n\n' % (baselineName, buildName))
  63. result = subprocess.run(
  64. ['bloaty', '--csv', buildName, '--', baselineName],
  65. stdout=subprocess.PIPE,
  66. stderr=subprocess.STDOUT,
  67. )
  68. if result.returncode != 0:
  69. logging.warning('Bloaty execution failed: %d', result.returncode)
  70. f.write('BLOAT EXECUTION FAILED WITH CODE %d:\n' % result.returncode)
  71. content = result.stdout.decode('utf8')
  72. f.write(content)
  73. f.write('\n')
  74. result = ComparisonResult(os.path.basename(buildName))
  75. try:
  76. reader = csv.reader(io.StringIO(content))
  77. for row in reader:
  78. section, vm, f = row
  79. if (section in SECTIONS_TO_WATCH) or (vm not in ['0', 'vmsize']):
  80. result.sectionChanges.append(
  81. SectionChange(section, int(f), int(vm)))
  82. except Exception:
  83. pass
  84. return result
  85. def generateBloatReport(outputFileName,
  86. baselineDir,
  87. buildOutputDir,
  88. title='BLOAT REPORT'):
  89. """Generates a bloat report fo files between two diferent directories."""
  90. logging.info('Generating bloat diff report between %s and %s', baselineDir,
  91. buildOutputDir)
  92. with open(outputFileName, 'wt') as f:
  93. f.write(title + '\n\n')
  94. baselineNames = set([name for name in filesInDirectory(baselineDir)])
  95. outputNames = set([name for name in filesInDirectory(buildOutputDir)])
  96. baselineOnly = baselineNames - outputNames
  97. if baselineOnly:
  98. logging.warning(
  99. 'Some files only exist in the baseline: %r', baselineOnly)
  100. f.write('Files found only in the baseline:\n ')
  101. f.write('\n %s'.join(baselineOnly))
  102. f.write('\n\n')
  103. outputOnly = outputNames - baselineNames
  104. if outputOnly:
  105. logging.warning('Some files only exist in the build output: %r',
  106. outputOnly)
  107. f.write('Files found only in the build output:\n ')
  108. f.write('\n %s'.join(outputOnly))
  109. f.write('\n\n')
  110. results = []
  111. for name in (baselineNames & outputNames):
  112. results.append(
  113. writeFileBloatReport(f, os.path.join(baselineDir, name),
  114. os.path.join(buildOutputDir, name)))
  115. return results
  116. def sendFileAsPrComment(job_name, filename, gh_token, gh_repo, gh_pr_number,
  117. compare_results, base_sha):
  118. """Generates a PR comment containing the specified file content."""
  119. logging.info('Uploading report to "%s", PR %d', gh_repo, gh_pr_number)
  120. rawText = open(filename, 'rt').read()
  121. # a consistent title to help identify obsolete comments
  122. titleHeading = 'Size increase report for "{jobName}"'.format(
  123. jobName=job_name)
  124. api = github.Github(gh_token)
  125. repo = api.get_repo(gh_repo)
  126. pull = repo.get_pull(gh_pr_number)
  127. for comment in pull.get_issue_comments():
  128. if not comment.body.startswith(titleHeading):
  129. continue
  130. logging.info(
  131. 'Removing obsolete comment with heading "%s"', (titleHeading))
  132. comment.delete()
  133. if all(len(file.sectionChanges) == 0 for file in compare_results):
  134. logging.info('No results to report')
  135. return
  136. compareTable = 'File | Section | File | VM\n---- | ---- | ----- | ---- \n'
  137. for file in compare_results:
  138. for change in file.sectionChanges:
  139. compareTable += '{0} | {1} | {2} | {3}\n'.format(file.fileName,
  140. change.section,
  141. change.fileChange,
  142. change.vmChange)
  143. # NOTE: PRs are issues with attached patches, hence the API naming
  144. pull.create_issue_comment("""{title} from {baseSha}
  145. {table}
  146. <details>
  147. <summary>Full report output</summary>
  148. ```
  149. {rawReportText}
  150. ```
  151. </details>
  152. """.format(title=titleHeading, baseSha=base_sha, table=compareTable, rawReportText=rawText))
  153. def getPullRequestBaseSha(githubToken, githubRepo, pullRequestNumber):
  154. """Figure out the SHA for the base of a pull request"""
  155. api = github.Github(githubToken)
  156. repo = api.get_repo(githubRepo)
  157. pull = repo.get_pull(pullRequestNumber)
  158. return pull.base.sha
  159. def cleanDir(name):
  160. """Ensures a clean directory with the given name exists. Only handles files"""
  161. if os.path.exists(name):
  162. for fname in os.listdir(name):
  163. path = os.path.join(name, fname)
  164. if os.path.isfile(path):
  165. os.unlink(path)
  166. else:
  167. os.mkdir(name)
  168. def downloadArtifact(artifact, dirName):
  169. """Extract an artifact into a directory."""
  170. zipFile = zipfile.ZipFile(io.BytesIO(artifact.downloadBlob()), 'r')
  171. logging.info('Extracting zip file to %r' % dirName)
  172. zipFile.extractall(dirName)
  173. def main():
  174. """Main task if executed standalone."""
  175. parser = argparse.ArgumentParser(
  176. description='Fetch master build artifacts.')
  177. parser.add_argument(
  178. '--output-dir',
  179. type=str,
  180. default='.',
  181. help='Where to download the artifacts')
  182. parser.add_argument(
  183. '--github-api-token',
  184. type=str,
  185. help='Github API token to upload the report as a comment')
  186. parser.add_argument(
  187. '--github-repository', type=str, help='Repository to use for PR comments')
  188. parser.add_argument(
  189. '--log-level',
  190. default=logging.INFO,
  191. type=lambda x: getattr(logging, x),
  192. help='Configure the logging level.')
  193. args = parser.parse_args()
  194. # Ensures somewhat pretty logging of what is going on
  195. logging.basicConfig(
  196. level=args.log_level,
  197. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  198. coloredlogs.install()
  199. if not args.github_api_token:
  200. logging.error(
  201. 'Required arguments missing: github api token is required.')
  202. return
  203. # all known artifacts
  204. artifacts = [a for a in github_fetch_artifacts.getAllArtifacts(
  205. args.github_api_token, args.github_repository)]
  206. # process newest artifacts first
  207. artifacts.sort(key=lambda x: x.created_at, reverse=True)
  208. current_time = datetime.datetime.now()
  209. seen_names = set()
  210. pull_artifact_re = re.compile('^(.*)-pull-(\\d+)$')
  211. binary_count = 0
  212. for a in artifacts:
  213. # Ignore size reports; they are handled by a separate script.
  214. if a.name.startswith('Size,'):
  215. continue
  216. # logs cleanup after 3 days
  217. is_log = a.name.endswith('-logs')
  218. if not is_log:
  219. binary_count = binary_count + 1
  220. need_delete = False
  221. if (current_time - a.created_at).days > BINARY_KEEP_DAYS:
  222. # Do not keep binary builds forever
  223. need_delete = True
  224. elif not is_log and binary_count > BINARY_MAX_COUNT:
  225. # Keep a maximum number of binary packages
  226. need_delete = True
  227. elif is_log and (current_time - a.created_at).days > LOG_KEEP_DAYS:
  228. # Logs are kept even shorter
  229. need_delete = True
  230. if need_delete:
  231. logging.info('Old artifact: %s from %r' % (a.name, a.created_at))
  232. a.delete()
  233. continue
  234. if a.name.endswith('-logs'):
  235. # logs names are duplicate, however that is fine
  236. continue
  237. if a.name in seen_names:
  238. logging.info('Artifact name already seen before: %s' % a.name)
  239. a.delete()
  240. continue
  241. seen_names.add(a.name)
  242. m = pull_artifact_re.match(a.name)
  243. if not m:
  244. logging.info('Non-PR artifact found: %r from %r' %
  245. (a.name, a.created_at))
  246. continue
  247. prefix = m.group(1)
  248. pull_number = int(m.group(2))
  249. logging.info('Processing PR %s via artifact %r' %
  250. (pull_number, a.name))
  251. try:
  252. base_sha = getPullRequestBaseSha(
  253. args.github_api_token, args.github_repository, pull_number)
  254. base_artifact_name = '%s-%s' % (prefix, base_sha)
  255. base_artifacts = [
  256. v for v in artifacts if v.name == base_artifact_name]
  257. if len(base_artifacts) != 1:
  258. raise Exception('Did not find exactly one artifact for %s: %r' % (
  259. base_artifact_name, [v.name for v in base_artifacts]))
  260. b = base_artifacts[0]
  261. logging.info('Diff will be against artifact %r' % b.name)
  262. aOutput = os.path.join(args.output_dir, 'pull_artifact')
  263. bOutput = os.path.join(args.output_dir, 'master_artifact')
  264. cleanDir(aOutput)
  265. cleanDir(bOutput)
  266. downloadArtifact(a, aOutput)
  267. downloadArtifact(b, bOutput)
  268. report_name = os.path.join(aOutput, 'report.csv')
  269. results = generateBloatReport(report_name, bOutput, aOutput)
  270. sendFileAsPrComment(prefix, report_name, args.github_api_token,
  271. args.github_repository, pull_number, results, base_sha)
  272. # If running over a top level directory, ensure git sees no output
  273. cleanDir(aOutput)
  274. cleanDir(bOutput)
  275. # Output processed.
  276. a.delete()
  277. except Exception:
  278. tb = traceback.format_exc()
  279. logging.warning('Failed to process bloat report: %s', tb)
  280. if __name__ == '__main__':
  281. # execute only if run as a script
  282. main()