check_copyright.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. #!/usr/bin/env python
  2. # SPDX-FileCopyrightText: 2021 Espressif Systems (Shanghai) CO LTD
  3. # SPDX-License-Identifier: Apache-2.0
  4. """
  5. Check files for copyright headers:
  6. - file not on ignore list:
  7. - old Espressif copyright -> replace with SPDX
  8. - SPDX with invalid year or old company name -> replace with valid SPDX
  9. - other SPDX copyright -> PASS
  10. - non-SPDX copyright -> FAIL
  11. - no copyright -> insert Espressif copyright
  12. - file on ignore list:
  13. - old Espressif copyright -> replace with SPDX, remove from ignore list
  14. - SPDX with invalid year or company format -> replace with valid SPDX and remove from ignore list
  15. else -> keep on ignore list
  16. """
  17. import argparse
  18. import datetime
  19. import fnmatch
  20. import os
  21. import re
  22. import sys
  23. import textwrap
  24. from typing import List, Tuple
  25. from comment_parser import comment_parser
  26. from comment_parser.parsers.common import Comment
  27. from thefuzz import fuzz
  28. IDF_PATH = os.getenv('IDF_PATH', os.getcwd())
  29. IGNORE_LIST_FN = os.path.join(IDF_PATH, 'tools/ci/check_copyright_ignore.txt')
  30. PERMANENT_IGNORE_LIST_FN = os.path.join(IDF_PATH, 'tools/ci/check_copyright_permanent_ignore.txt')
  31. CHECK_FAIL_MESSAGE = textwrap.dedent('''\
  32. To make a file, not on the ignore list to pass the test it needs to contain both:
  33. an SPDX-FileCopyrightText and
  34. an SPDX-License-Identifier. For example:
  35. {example}
  36. More information about SPDX license identifiers can be found here:
  37. https://spdx.github.io/spdx-spec/appendix-V-using-SPDX-short-identifiers-in-source-files/
  38. To have this hook automatically insert the standard Espressif copyright notice,
  39. ensure the word "copyright" is not in any comment up to line 30 and the file is not on the ignore list.
  40. Below is a list of files, which failed the copyright check.
  41. Files prefixed with "(ignore)" are on the ignore list and their presence alone won't cause the check to fail.
  42. ''')
  43. CHECK_MODIFY_MESSAGE = textwrap.dedent('''\
  44. Above is a list of files, which were modified. Please check their contents, stage them and run the commit again!
  45. Files prefixed with "(ignore)" were on the ignore list at the time of invoking this script.
  46. They may have been removed if noted above.
  47. Pre-commit's option --show-diff-on-failure may be used to show a diff when hooks modify files.
  48. ''')
  49. # This is an old header style, which this script
  50. # attempts to detect and replace with a new SPDX license identifier
  51. OLD_APACHE_HEADER = textwrap.dedent('''\
  52. Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
  53. Licensed under the Apache License, Version 2.0 (the "License");
  54. you may not use this file except in compliance with the License.
  55. You may obtain a copy of the License at
  56. http://www.apache.org/licenses/LICENSE-2.0
  57. Unless required by applicable law or agreed to in writing, software
  58. distributed under the License is distributed on an "AS IS" BASIS,
  59. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  60. See the License for the specific language governing permissions and
  61. limitations under the License.
  62. ''')
  63. # New headers to be used
  64. NEW_APACHE_HEADER_PYTHON = textwrap.dedent('''\
  65. # SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD
  66. # SPDX-License-Identifier: Apache-2.0
  67. ''')
  68. PYTHON_NOTICE = '# SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD'
  69. NOTICE_MULTILINE = ' * SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD'
  70. NOTICE = '// SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD'
  71. NEW_APACHE_HEADER = textwrap.dedent('''\
  72. /*
  73. * SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD
  74. *
  75. * SPDX-License-Identifier: Apache-2.0
  76. */
  77. ''')
  78. MIME = {
  79. 'python': 'text/x-python',
  80. 'c': 'text/x-c',
  81. 'cpp': 'text/x-c++'
  82. }
  83. # terminal color output
  84. TERMINAL_RESET = '\33[0m'
  85. TERMINAL_YELLOW = '\33[93m'
  86. TERMINAL_GREEN = '\33[92m'
  87. TERMINAL_RED = '\33[91m'
  88. TERMINAL_GRAY = '\33[90m'
  89. class UnsupportedFileType(Exception):
  90. """Exception raised for unsupported file types.
  91. Attributes:
  92. file_name -- input file which caused the error
  93. message -- explanation of the error
  94. """
  95. def __init__(self, file_name: str, message: str='this file type is not supported') -> None:
  96. self.fine_name = file_name
  97. self.message = message
  98. super().__init__(self.message)
  99. def __str__(self) -> str:
  100. return f'{self.fine_name}: {self.message}'
  101. class NotFound(Exception):
  102. """Exception raised when something is not found.
  103. Attributes:
  104. thing -- what was not found
  105. """
  106. def __init__(self, thing: str='something') -> None:
  107. self.thing = thing
  108. super().__init__(self.thing)
  109. def __str__(self) -> str:
  110. return f'{self.thing} was not found'
  111. class CustomFile():
  112. """
  113. Custom data object to hold file name and if it's on the ignore list
  114. and to make it easier to print
  115. """
  116. def __init__(self, file_name: str, is_on_ignore_list: bool) -> None:
  117. self.file_name = file_name
  118. self.is_on_ignore_list = is_on_ignore_list
  119. def __str__(self) -> str:
  120. if self.is_on_ignore_list:
  121. return f'(ignore) {self.file_name}'
  122. return f' {self.file_name}'
  123. def get_file_mime(fn: str) -> str:
  124. """
  125. Return the mime type based on file's extension
  126. """
  127. if fn.endswith('.py'):
  128. return MIME['python']
  129. if fn.endswith(('.cpp', '.hpp')):
  130. return MIME['cpp']
  131. if fn.endswith(('.c', '.h', '.ld')):
  132. return MIME['c']
  133. raise UnsupportedFileType(fn)
  134. def get_comments(code: str, mime: str) -> list:
  135. """
  136. Extracts all comments from source code and does a multiline split
  137. """
  138. comments = comment_parser.extract_comments_from_str(code, mime)
  139. new_comments = []
  140. for comment in comments:
  141. if comment.is_multiline():
  142. comment_lines = comment.text().splitlines()
  143. for line_number, line in enumerate(comment_lines, start=comment.line_number()):
  144. new_comments.append(Comment(line, line_number, True))
  145. else:
  146. new_comments.append(comment)
  147. return new_comments
  148. def has_valid_copyright(file_name: str, mime: str, is_on_ignore: bool, args: argparse.Namespace) -> Tuple[bool, bool]:
  149. """
  150. Detects if a file has a valid SPDX copyright notice.
  151. returns: Tuple[valid, modified]
  152. """
  153. detected_licenses = []
  154. detected_notices = []
  155. valid, modified = False, False
  156. with open(file_name, 'r') as f:
  157. code = f.read()
  158. comments = get_comments(code, mime)
  159. code_lines = code.splitlines()
  160. if not code_lines: # file is empty
  161. print(f'{TERMINAL_YELLOW}"{file_name}" is empty!{TERMINAL_RESET}')
  162. valid = True
  163. return valid, modified
  164. if args.replace:
  165. try:
  166. year, line = detect_old_header_style(file_name, comments, args)
  167. except NotFound as e:
  168. if args.verbose:
  169. print(f'{TERMINAL_GRAY}{e} in {file_name}{TERMINAL_RESET}')
  170. else:
  171. code_lines = replace_copyright(code_lines, year, line, mime, file_name)
  172. valid = True
  173. for comment in comments:
  174. if comment.line_number() > args.max_lines:
  175. break
  176. matches = re.search(r'SPDX-FileCopyrightText: ?(.*)', comment.text(), re.IGNORECASE)
  177. if matches:
  178. detected_notices.append((matches.group(1), comment.line_number()))
  179. try:
  180. year = extract_year_from_espressif_notice(matches.group(1))
  181. except NotFound as e:
  182. if args.verbose:
  183. print(f'{TERMINAL_GRAY}Not an {e.thing} {file_name}:{comment.line_number()}{TERMINAL_RESET}')
  184. else:
  185. template = NOTICE
  186. if comment.is_multiline():
  187. template = NOTICE_MULTILINE
  188. if mime == MIME['python']:
  189. template = PYTHON_NOTICE
  190. code_lines[comment.line_number() - 1] = template.format(years=format_years(year, file_name))
  191. matches = re.search(r'SPDX-License-Identifier: ?(.*)', comment.text(), re.IGNORECASE)
  192. if matches:
  193. detected_licenses.append((matches.group(1), comment.line_number()))
  194. if not is_on_ignore and not contains_any_copyright(comments, args):
  195. code_lines = insert_copyright(code_lines, file_name, mime)
  196. print(f'"{file_name}": inserted copyright notice - please check the content and run commit again!')
  197. valid = True
  198. new_code = '\n'.join(code_lines) + '\n'
  199. if code != new_code:
  200. with open(file_name, 'w') as f:
  201. f.write(new_code)
  202. modified = True
  203. if detected_licenses and detected_notices:
  204. if args.debug:
  205. print(f'{file_name} notices: {detected_notices}')
  206. print(f'{file_name} licenses: {detected_licenses}')
  207. valid = True
  208. return valid, modified
  209. def contains_any_copyright(comments: list, args: argparse.Namespace) -> bool:
  210. """
  211. Return True if any comment contain the word "copyright"
  212. """
  213. return any(
  214. comment.line_number() <= args.max_lines
  215. and re.search(r'copyright', comment.text(), re.IGNORECASE)
  216. for comment in comments
  217. )
  218. def insert_copyright(code_lines: list, file_name: str, mime: str) -> list:
  219. """
  220. Insert a copyright notice in the begining of a file, respecting a potencial shebang
  221. """
  222. new_code_lines = []
  223. # if first line contains a shebang, keep it first
  224. if code_lines[0].startswith('#!'):
  225. new_code_lines.append(code_lines[0])
  226. del code_lines[0]
  227. template = NEW_APACHE_HEADER
  228. if mime == MIME['python']:
  229. template = NEW_APACHE_HEADER_PYTHON
  230. new_code_lines.extend(template.format(years=format_years(0, file_name)).splitlines())
  231. new_code_lines.extend(code_lines)
  232. return new_code_lines
  233. def extract_year_from_espressif_notice(notice: str) -> int:
  234. """
  235. Extracts copyright year (creation date) from a Espressif copyright notice
  236. """
  237. matches = re.search(r'(\d{4})(?:-\d{4})? Espressif Systems', notice, re.IGNORECASE)
  238. if matches:
  239. return int(matches.group(1))
  240. raise NotFound('Espressif copyright notice')
  241. def replace_copyright(code_lines: list, year: int, line: int, mime: str, file_name: str) -> list:
  242. """
  243. Replaces old header style with new SPDX form.
  244. """
  245. # replace from line number (line) to line number (line + number of lines in the OLD HEADER)
  246. # with new header depending on file type
  247. end = line + OLD_APACHE_HEADER.count('\n')
  248. del code_lines[line - 1:end - 1]
  249. template = NEW_APACHE_HEADER
  250. if mime == MIME['python']:
  251. template = NEW_APACHE_HEADER_PYTHON
  252. code_lines[line - 1:line - 1] = template.format(years=format_years(year, file_name)).splitlines()
  253. print(f'{TERMINAL_GRAY}"{file_name}": replacing old header (lines: {line}-{end}) with new SPDX header style.{TERMINAL_RESET}')
  254. return code_lines
  255. def detect_old_header_style(file_name: str, comments: list, args: argparse.Namespace) -> Tuple[int, int]:
  256. """
  257. Detects old header style (Apache-2.0) and extracts the year and line number.
  258. returns: Tuple[year, comment line number]
  259. """
  260. comments_text = str()
  261. for comment in comments:
  262. if comment.line_number() > args.max_lines:
  263. break
  264. comments_text = f'{comments_text}\n{comment.text().strip()}'
  265. ratio = fuzz.partial_ratio(comments_text, OLD_APACHE_HEADER)
  266. if args.debug:
  267. print(f'{TERMINAL_GRAY}ratio for {file_name}: {ratio}{TERMINAL_RESET}')
  268. if ratio > args.fuzzy_ratio:
  269. for comment in comments:
  270. # only check up to line number MAX_LINES
  271. if comment.line_number() > args.max_lines:
  272. break
  273. try:
  274. year = extract_year_from_espressif_notice(comment.text())
  275. except NotFound:
  276. pass
  277. else:
  278. return (year, comment.line_number())
  279. raise NotFound('Old Espressif header')
  280. def format_years(past: int, file_name: str) -> str:
  281. """
  282. Function to format a year:
  283. - just current year -> output: [year]
  284. - some year in the past -> output: [past year]-[current year]
  285. """
  286. today = datetime.datetime.now().year
  287. if past == 0:
  288. # use the current year
  289. past = today
  290. if past == today:
  291. return str(past)
  292. if past > today or past < 1972:
  293. error_msg = f'{file_name}: invalid year in the copyright header detected. ' \
  294. + 'Check your system clock and the copyright header.'
  295. raise ValueError(error_msg)
  296. return '{past}-{today}'.format(past=past, today=today)
  297. def check_copyrights(args: argparse.Namespace) -> Tuple[List, List]:
  298. """
  299. Main logic and for loop
  300. returns:
  301. list of files with wrong headers
  302. list of files which were modified
  303. """
  304. wrong_header_files = []
  305. modified_files = []
  306. with open(IGNORE_LIST_FN, 'r') as f:
  307. ignore_list = [item.strip() for item in f.readlines()]
  308. updated_ignore_list = ignore_list.copy()
  309. with open(PERMANENT_IGNORE_LIST_FN) as f:
  310. permanent_ignore_list = [item.strip() for item in f.readlines()]
  311. for file_name in args.filenames:
  312. try:
  313. mime = get_file_mime(file_name)
  314. except UnsupportedFileType:
  315. print(f'{TERMINAL_GRAY}"{file_name}" is not of a supported type! Skipping.{TERMINAL_RESET}')
  316. continue
  317. if any(fnmatch.fnmatch(file_name, pattern) for pattern in permanent_ignore_list):
  318. print(f'{TERMINAL_YELLOW}"{file_name}" is ignored by a permanent pattern!{TERMINAL_RESET}')
  319. continue
  320. if file_name in ignore_list:
  321. if args.verbose:
  322. print(f'{TERMINAL_GRAY}"{file_name}" is on the ignore list.{TERMINAL_RESET}')
  323. valid, modified = has_valid_copyright(file_name, mime, True, args)
  324. if modified:
  325. modified_files.append(CustomFile(file_name, True))
  326. if valid:
  327. if args.dont_update_ignore_list:
  328. print(f'{TERMINAL_YELLOW}"{file_name}" now has a correct copyright header - remove it from the ignore list '
  329. f'or run this script without the --dont-update-ignore-list option to do this automatically!{TERMINAL_RESET}')
  330. else:
  331. updated_ignore_list.remove(file_name)
  332. else:
  333. wrong_header_files.append(CustomFile(file_name, True))
  334. else:
  335. valid, modified = has_valid_copyright(file_name, mime, False, args)
  336. if modified:
  337. modified_files.append(CustomFile(file_name, False))
  338. if not valid:
  339. wrong_header_files.append(CustomFile(file_name, False))
  340. if updated_ignore_list != ignore_list:
  341. with open(IGNORE_LIST_FN, 'w') as f:
  342. for item in updated_ignore_list:
  343. f.write(f'{item}\n')
  344. modified_files.append(CustomFile(IGNORE_LIST_FN, False))
  345. print(f'\n{TERMINAL_GREEN}Files removed from ignore list:{TERMINAL_RESET}')
  346. for file in ignore_list:
  347. if file not in updated_ignore_list:
  348. print(f' {file}')
  349. return wrong_header_files, modified_files
  350. def build_parser() -> argparse.ArgumentParser:
  351. parser = argparse.ArgumentParser(description='Check copyright headers')
  352. parser.add_argument('-v', '--verbose', action='store_true',
  353. help='print more information (useful for debugging)')
  354. parser.add_argument('-r', '--replace', action='store_true',
  355. help='tries to update copyright notices')
  356. parser.add_argument('-m', '--max-lines', type=int, default=30,
  357. help='how far to check for copyright notice in a file (default 30)')
  358. parser.add_argument('-f', '--fuzzy-ratio', type=int, default=95,
  359. help='minimum %% ratio to be considered as equal to the old header style (default 95)')
  360. parser.add_argument('-d', '--debug', action='store_true',
  361. help='print debug info')
  362. parser.add_argument('-du', '--dont-update-ignore-list', action='store_true')
  363. parser.add_argument('filenames', nargs='+', help='file(s) to check', metavar='file')
  364. return parser
  365. def main() -> None:
  366. args = build_parser().parse_args()
  367. if args.debug:
  368. print(f'{TERMINAL_GRAY}Running with args: {args}')
  369. print(f'Permanent ignore list: {PERMANENT_IGNORE_LIST_FN}')
  370. print(f'Ignore list: {IGNORE_LIST_FN}{TERMINAL_RESET}')
  371. wrong_header_files, modified_files = check_copyrights(args)
  372. if modified_files:
  373. print(f'\n{TERMINAL_YELLOW}Modified files:{TERMINAL_RESET}')
  374. for file in modified_files:
  375. print(file)
  376. print(CHECK_MODIFY_MESSAGE)
  377. abort_commit = bool(modified_files)
  378. if wrong_header_files:
  379. print(f'{TERMINAL_YELLOW}Information about this test{TERMINAL_RESET}')
  380. print(CHECK_FAIL_MESSAGE.format(example=NEW_APACHE_HEADER.format(years=datetime.datetime.now().year)))
  381. print(f'{TERMINAL_RED}Files which failed the copyright check:{TERMINAL_RESET}')
  382. for wrong_file in wrong_header_files:
  383. if not wrong_file.is_on_ignore_list:
  384. abort_commit = True
  385. print(wrong_file)
  386. num_files_processed = len(args.filenames)
  387. if abort_commit:
  388. num_files_modified = len(modified_files)
  389. num_files_wrong = len(wrong_header_files)
  390. print(f'{TERMINAL_YELLOW}Processed {num_files_processed} source file{"s"[:num_files_processed^1]},', end=' ')
  391. print(f'{num_files_modified} modified and {num_files_wrong} with invalid copyright.{TERMINAL_RESET}')
  392. sys.exit(1) # sys.exit(1) to abort the commit
  393. # pre-commit also automatically aborts a commit if files are modified on disk
  394. print(f'\n{TERMINAL_GREEN}Successfuly processed {num_files_processed} file{"s"[:num_files_processed^1]}.{TERMINAL_RESET}\n')
  395. if __name__ == '__main__':
  396. main()