check_codeowners.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. #!/usr/bin/env python
  2. #
  3. # Utility script for ESP-IDF developers to work with the CODEOWNERS file.
  4. #
  5. # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. import argparse
  19. import os
  20. import re
  21. import subprocess
  22. import sys
  23. from idf_ci_utils import IDF_PATH
  24. CODEOWNERS_PATH = os.path.join(IDF_PATH, '.gitlab', 'CODEOWNERS')
  25. CODEOWNER_GROUP_PREFIX = '@esp-idf-codeowners/'
  26. def get_all_files():
  27. """
  28. Get list of all file paths in the repository.
  29. """
  30. # only split on newlines, since file names may contain spaces
  31. return subprocess.check_output(['git', 'ls-files'], cwd=IDF_PATH).decode('utf-8').strip().split('\n')
  32. def pattern_to_regex(pattern):
  33. """
  34. Convert the CODEOWNERS path pattern into a regular expression string.
  35. """
  36. orig_pattern = pattern # for printing errors later
  37. # Replicates the logic from normalize_pattern function in Gitlab ee/lib/gitlab/code_owners/file.rb:
  38. if not pattern.startswith('/'):
  39. pattern = '/**/' + pattern
  40. if pattern.endswith('/'):
  41. pattern = pattern + '**/*'
  42. # Convert the glob pattern into a regular expression:
  43. # first into intermediate tokens
  44. pattern = (pattern.replace('**/', ':REGLOB:')
  45. .replace('**', ':INVALID:')
  46. .replace('*', ':GLOB:')
  47. .replace('.', ':DOT:')
  48. .replace('?', ':ANY:'))
  49. if pattern.find(':INVALID:') >= 0:
  50. raise ValueError("Likely invalid pattern '{}': '**' should be followed by '/'".format(orig_pattern))
  51. # then into the final regex pattern:
  52. re_pattern = (pattern.replace(':REGLOB:', '(?:.*/)?')
  53. .replace(':GLOB:', '[^/]*')
  54. .replace(':DOT:', '[.]')
  55. .replace(':ANY:', '.') + '$')
  56. if re_pattern.startswith('/'):
  57. re_pattern = '^' + re_pattern
  58. return re_pattern
  59. def files_by_regex(all_files, regex):
  60. """
  61. Return all files in the repository matching the given regular expresion.
  62. """
  63. return [file for file in all_files if regex.search('/' + file)]
  64. def files_by_pattern(all_files, pattern=None):
  65. """
  66. Return all the files in the repository matching the given CODEOWNERS pattern.
  67. """
  68. if not pattern:
  69. return all_files
  70. return files_by_regex(all_files, re.compile(pattern_to_regex(pattern)))
  71. def action_identify(args):
  72. best_match = []
  73. all_files = get_all_files()
  74. with open(CODEOWNERS_PATH) as f:
  75. for line in f:
  76. line = line.strip()
  77. if not line or line.startswith('#'):
  78. continue
  79. tokens = line.split()
  80. path_pattern = tokens[0]
  81. owners = tokens[1:]
  82. files = files_by_pattern(all_files, path_pattern)
  83. if args.path in files:
  84. best_match = owners
  85. for owner in best_match:
  86. print(owner)
  87. def action_test_pattern(args):
  88. re_pattern = pattern_to_regex(args.pattern)
  89. if args.regex:
  90. print(re_pattern)
  91. return
  92. files = files_by_regex(get_all_files(), re.compile(re_pattern))
  93. for f in files:
  94. print(f)
  95. def action_ci_check(args):
  96. errors = []
  97. def add_error(msg):
  98. errors.append('{}:{}: {}'.format(CODEOWNERS_PATH, line_no, msg))
  99. all_files = get_all_files()
  100. prev_path_pattern = ''
  101. with open(CODEOWNERS_PATH) as f:
  102. for line_no, line in enumerate(f, start=1):
  103. # Skip empty lines and comments
  104. line = line.strip()
  105. if line.startswith('# sort-order-reset'):
  106. prev_path_pattern = ''
  107. if not line or line.startswith('#'):
  108. continue
  109. # Each line has a form of "<path> <owners>+"
  110. tokens = line.split()
  111. path_pattern = tokens[0]
  112. owners = tokens[1:]
  113. if not owners:
  114. add_error('no owners specified for {}'.format(path_pattern))
  115. # Check that the file is sorted by path patterns
  116. path_pattern_for_cmp = path_pattern.replace('-', '_') # ignore difference between _ and - for ordering
  117. if prev_path_pattern and path_pattern_for_cmp < prev_path_pattern:
  118. add_error('file is not sorted: {} < {}'.format(path_pattern_for_cmp, prev_path_pattern))
  119. prev_path_pattern = path_pattern_for_cmp
  120. # Check that the pattern matches at least one file
  121. files = files_by_pattern(all_files, path_pattern)
  122. if not files:
  123. add_error('no files matched by pattern {}'.format(path_pattern))
  124. for o in owners:
  125. # Sanity-check the owner group name
  126. if not o.startswith(CODEOWNER_GROUP_PREFIX):
  127. add_error("owner {} doesn't start with {}".format(o, CODEOWNER_GROUP_PREFIX))
  128. if not errors:
  129. print('No errors found.')
  130. else:
  131. print('Errors found!')
  132. for e in errors:
  133. print(e)
  134. raise SystemExit(1)
  135. def main():
  136. parser = argparse.ArgumentParser(
  137. sys.argv[0], description='Internal helper script for working with the CODEOWNERS file.'
  138. )
  139. subparsers = parser.add_subparsers(dest='action')
  140. identify = subparsers.add_parser(
  141. 'identify',
  142. help='List the owners of the specified path within IDF.'
  143. "This command doesn't support files inside submodules, or files not added to git repository.",
  144. )
  145. identify.add_argument('path', help='Path of the file relative to the root of the repository')
  146. subparsers.add_parser(
  147. 'ci-check',
  148. help='Check CODEOWNERS file: every line should match at least one file, sanity-check group names, '
  149. 'check that the file is sorted by paths',
  150. )
  151. test_pattern = subparsers.add_parser(
  152. 'test-pattern',
  153. help='Print files in the repository for a given CODEOWNERS pattern. Useful when adding new rules.'
  154. )
  155. test_pattern.add_argument('--regex', action='store_true', help='Print the equivalent regular expression instead of the file list.')
  156. test_pattern.add_argument('pattern', help='Path pattern to get the list of files for')
  157. args = parser.parse_args()
  158. if args.action is None:
  159. parser.print_help()
  160. parser.exit(1)
  161. action_func_name = 'action_' + args.action.replace('-', '_')
  162. action_func = globals()[action_func_name]
  163. action_func(args)
  164. if __name__ == '__main__':
  165. main()