| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603 |
- #!/usr/bin/env python
- # SPDX-FileCopyrightText: 2021 Espressif Systems (Shanghai) CO LTD
- # SPDX-License-Identifier: Apache-2.0
- """
- Check files for copyright headers:
- - file not on ignore list:
- - old Espressif copyright -> replace with SPDX
- - SPDX with invalid year or old company name -> replace with valid SPDX
- - other SPDX copyright -> PASS
- - non-SPDX copyright -> FAIL
- - no copyright -> insert Espressif copyright
- - file on ignore list:
- - old Espressif copyright -> replace with SPDX, remove from ignore list
- - SPDX with invalid year or company format -> replace with valid SPDX and remove from ignore list
- else -> keep on ignore list
- """
- import argparse
- import ast
- import configparser
- import datetime
- import os
- import re
- import sys
- import textwrap
- from typing import List, Tuple
- import pathspec
- import yaml
- # importing the whole comment_parser causes a crash when running inside of gitbash environment on Windows.
- from comment_parser.parsers import c_parser, python_parser
- from comment_parser.parsers.common import Comment
- from thefuzz import fuzz
- IDF_PATH = os.getenv('IDF_PATH', os.getcwd())
- IGNORE_LIST_FN = os.path.join(IDF_PATH, 'tools/ci/check_copyright_ignore.txt')
- CONFIG_FN = os.path.join(IDF_PATH, 'tools', 'ci', 'check_copyright_config.yaml')
- CHECK_FAIL_MESSAGE = textwrap.dedent('''\
- To make a file pass the test, it needs to contain both:
- an SPDX-FileCopyrightText and an SPDX-License-Identifier with an allowed license for the section.
- More information about SPDX license identifiers can be found here:
- https://spdx.github.io/spdx-spec/appendix-V-using-SPDX-short-identifiers-in-source-files/
- To have this hook automatically insert the standard Espressif copyright notice,
- ensure the word "copyright" is not in any comment up to line 30 and the file is not on the ignore list.
- Below is a list of files, which failed the copyright check.
- ''')
- CHECK_MODIFY_MESSAGE = textwrap.dedent('''\
- Above is a list of files, which were modified. Please check their contents, stage them and run the commit again!
- Files prefixed with "(ignore)" were on the ignore list at the time of invoking this script.
- They may have been removed if noted above.
- ''')
- CHECK_FOOTER_MESSAGE = textwrap.dedent('''\
- Additional information about this hook and copyright headers may be found here:
- https://docs.espressif.com/projects/esp-idf/en/latest/esp32/contribute/copyright-guide.html
- ''')
- # This is an old header style, which this script
- # attempts to detect and replace with a new SPDX license identifier
- OLD_APACHE_HEADER = textwrap.dedent('''\
- Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ''')
- # New headers to be used
- NEW_APACHE_HEADER_PYTHON = textwrap.dedent('''\
- # SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD
- # SPDX-License-Identifier: Apache-2.0
- ''')
- NEW_APACHE_HEADER = textwrap.dedent('''\
- /*
- * SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD
- *
- * SPDX-License-Identifier: Apache-2.0
- */
- ''')
- # filetype -> mime
- MIME = {
- 'python': 'text/x-python',
- 'c': 'text/x-c',
- 'cpp': 'text/x-c++'
- }
- # mime -> parser
- MIME_PARSER = {
- 'text/x-c': c_parser,
- 'text/x-c++': c_parser,
- 'text/x-python': python_parser,
- }
- # terminal color output
- TERMINAL_RESET = '\33[0m'
- TERMINAL_BOLD = '\33[1m'
- TERMINAL_YELLOW = '\33[93m'
- TERMINAL_GREEN = '\33[92m'
- TERMINAL_RED = '\33[91m'
- TERMINAL_GRAY = '\33[90m'
- class UnsupportedFileType(Exception):
- """Exception raised for unsupported file types.
- Attributes:
- file_name -- input file which caused the error
- message -- explanation of the error
- """
- def __init__(self, file_name: str, message: str = 'this file type is not supported') -> None:
- self.fine_name = file_name
- self.message = message
- super().__init__(self.message)
- def __str__(self) -> str:
- return f'{self.fine_name}: {self.message}'
- class NotFound(Exception):
- """Exception raised when something is not found.
- Attributes:
- thing -- what was not found
- """
- def __init__(self, thing: str = 'something') -> None:
- self.thing = thing
- super().__init__(self.thing)
- def __str__(self) -> str:
- return f'{self.thing} was not found'
- class CustomFile:
- """
- Custom data object to hold file name and if it's on the ignore list
- and to make it easier to print
- """
- def __init__(self, file_name: str, is_on_ignore_list: bool) -> None:
- self.file_name = file_name
- self.is_on_ignore_list = is_on_ignore_list
- def __str__(self) -> str:
- if self.is_on_ignore_list:
- return f'(ignore) {self.file_name}'
- return f' {self.file_name}'
- class CommentHolder(Comment):
- """
- Hold the comment, its line number and when it is multiline,
- also store if it's the first in a comment block
- """
- def __init__(self, text: str, line_number: int, multiline: bool = False, first_in_multiline: bool = False):
- """
- Args:
- text: String text of comment.
- line_number: Line number (int) comment was found on.
- multiline: bool is it multiline
- first_in_multiline: bool if multiline, is it first in that comment block
- """
- super(self.__class__, self).__init__(text, line_number, multiline)
- self._first_in_multiline = first_in_multiline and multiline
- def is_first_in_multiline(self) -> bool:
- """
- Returns whether this comment was a first in a multiline comment.
- """
- return self._first_in_multiline
- def get_file_mime(fn: str) -> str:
- """
- Return the mime type based on file's extension
- """
- if fn.endswith('.py'):
- return MIME['python']
- if fn.endswith(('.cpp', '.hpp')):
- return MIME['cpp']
- if fn.endswith(('.c', '.h', '.ld')):
- return MIME['c']
- raise UnsupportedFileType(fn)
- def get_comments(code: str, mime: str) -> list:
- """
- Extracts all comments from source code and does a multiline split
- """
- parser = MIME_PARSER[mime]
- comments = parser.extract_comments(code)
- new_comments = []
- for comment in comments:
- if comment.is_multiline():
- comment_lines = comment.text().splitlines()
- for line_number, line in enumerate(comment_lines, start=comment.line_number()):
- # the third argument of Comment is a bool multiline. Store the relative line number inside the multiline comment
- new_comments.append(CommentHolder(line, line_number, True, line_number == comment.line_number()))
- else:
- new_comments.append(CommentHolder(comment.text(), comment.line_number()))
- return new_comments
- def has_valid_copyright(file_name: str, mime: str, is_on_ignore: bool, config_section: configparser.SectionProxy,
- args: argparse.Namespace) -> Tuple[bool, bool]:
- """
- Detects if a file has a valid SPDX copyright notice.
- returns: Tuple[valid, modified]
- """
- detected_licenses = []
- detected_notices = []
- detected_contributors = []
- valid, modified = False, False
- with open(file_name, 'r') as f:
- code = f.read()
- comments = get_comments(code, mime)
- code_lines = code.splitlines()
- if not code_lines: # file is empty
- print(f'{TERMINAL_YELLOW}"{file_name}" is empty!{TERMINAL_RESET}')
- valid = True
- return valid, modified
- if args.replace:
- try:
- year, line = detect_old_header_style(file_name, comments, args)
- except NotFound as e:
- if args.debug:
- print(f'{TERMINAL_GRAY}{e} in {file_name}{TERMINAL_RESET}')
- else:
- code_lines = replace_copyright(code_lines, year, line, mime, file_name)
- valid = True
- for comment in comments:
- if comment.line_number() > args.max_lines:
- break
- matches = re.search(r'SPDX-FileCopyrightText: ?(.*)', comment.text(), re.IGNORECASE)
- if matches:
- detected_notices.append((matches.group(1), comment.line_number()))
- try:
- year = extract_year_from_espressif_notice(matches.group(1))
- except NotFound as e:
- if args.verbose:
- print(f'{TERMINAL_GRAY}Not an {e.thing} {file_name}:{comment.line_number()}{TERMINAL_RESET}')
- else:
- template = '// SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
- if comment.is_multiline():
- template = ' * SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
- if comment.is_first_in_multiline():
- template = '/* SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
- if mime == MIME['python']:
- template = '# SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
- code_lines[comment.line_number() - 1] = template.format(years=format_years(year, file_name))
- matches = re.search(r'SPDX-FileContributor: ?(.*)', comment.text(), re.IGNORECASE)
- if matches:
- detected_contributors.append((matches.group(1), comment.line_number()))
- try:
- year = extract_year_from_espressif_notice(matches.group(1))
- except NotFound as e:
- if args.debug:
- print(f'{TERMINAL_GRAY}Not an {e.thing} {file_name}:{comment.line_number()}{TERMINAL_RESET}')
- else:
- template = '// SPDX-FileContributor: ' + config_section['espressif_copyright']
- if comment.is_multiline():
- template = ' * SPDX-FileContributor: ' + config_section['espressif_copyright']
- if comment.is_first_in_multiline():
- template = '/* SPDX-FileContributor: ' + config_section['espressif_copyright']
- if mime == MIME['python']:
- template = '# SPDX-FileContributor: ' + config_section['espressif_copyright']
- code_lines[comment.line_number() - 1] = template.format(years=format_years(year, file_name))
- matches = re.search(r'SPDX-License-Identifier: ?(.*)', comment.text(), re.IGNORECASE)
- if matches:
- detected_licenses.append((matches.group(1), comment.line_number()))
- if not is_on_ignore and not contains_any_copyright(comments, args):
- code_lines = insert_copyright(code_lines, file_name, mime, config_section)
- print(f'"{file_name}": inserted copyright notice - please check the content and run commit again!')
- valid = True
- new_code = '\n'.join(code_lines) + '\n'
- if code != new_code:
- with open(file_name, 'w') as f:
- f.write(new_code)
- modified = True
- if detected_licenses and detected_notices:
- valid = True
- if args.debug:
- print(f'{file_name} notices: {detected_notices}')
- print(f'{file_name} licenses: {detected_licenses}')
- if detected_licenses:
- for detected_license, line_number in detected_licenses:
- allowed_licenses = ast.literal_eval(config_section['allowed_licenses'])
- if not allowed_license_combination(detected_license, allowed_licenses):
- valid = False
- print(f'{TERMINAL_RED}{file_name}:{line_number} License "{detected_license}" is not allowed! Allowed licenses: {allowed_licenses}.')
- return valid, modified
- def contains_any_copyright(comments: list, args: argparse.Namespace) -> bool:
- """
- Return True if any comment contain the word "copyright"
- """
- return any(
- comment.line_number() <= args.max_lines
- and re.search(r'copyright', comment.text(), re.IGNORECASE)
- for comment in comments
- )
- def insert_copyright(code_lines: list, file_name: str, mime: str, config_section: configparser.SectionProxy) -> list:
- """
- Insert a copyright notice in the beginning of a file, respecting a potential shebang
- """
- new_code_lines = []
- # if first line contains a shebang, keep it first
- if code_lines[0].startswith('#!'):
- new_code_lines.append(code_lines[0])
- del code_lines[0]
- template = config_section['new_notice_c']
- if mime == MIME['python']:
- template = config_section['new_notice_python']
- new_code_lines.extend(template.format(license=config_section['license_for_new_files'], years=format_years(0, file_name)).splitlines())
- new_code_lines.extend(code_lines)
- return new_code_lines
- def extract_year_from_espressif_notice(notice: str) -> int:
- """
- Extracts copyright year (creation date) from a Espressif copyright notice
- """
- matches = re.search(r'(\d{4})(?:-\d{4})? Espressif Systems', notice, re.IGNORECASE)
- if matches:
- return int(matches.group(1))
- raise NotFound('Espressif copyright notice')
- def replace_copyright(code_lines: list, year: int, line: int, mime: str, file_name: str) -> list:
- """
- Replaces old header style with new SPDX form.
- """
- # replace from line number (line) to line number (line + number of lines in the OLD HEADER)
- # with new header depending on file type
- end = line + OLD_APACHE_HEADER.count('\n')
- del code_lines[line - 1:end - 1]
- template = NEW_APACHE_HEADER
- if mime == MIME['python']:
- template = NEW_APACHE_HEADER_PYTHON
- code_lines[line - 1:line - 1] = template.format(years=format_years(year, file_name)).splitlines()
- print(f'{TERMINAL_BOLD}"{file_name}": replacing old Apache-2.0 header (lines: {line}-{end}) with the new SPDX header.{TERMINAL_RESET}')
- return code_lines
- def detect_old_header_style(file_name: str, comments: list, args: argparse.Namespace) -> Tuple[int, int]:
- """
- Detects old header style (Apache-2.0) and extracts the year and line number.
- returns: Tuple[year, comment line number]
- """
- comments_text = str()
- for comment in comments:
- if comment.line_number() > args.max_lines:
- break
- comments_text = f'{comments_text}\n{comment.text().strip()}'
- ratio = fuzz.partial_ratio(comments_text, OLD_APACHE_HEADER)
- if args.debug:
- print(f'{TERMINAL_GRAY}ratio for {file_name}: {ratio}{TERMINAL_RESET}')
- if ratio > args.fuzzy_ratio:
- for comment in comments:
- # only check up to line number MAX_LINES
- if comment.line_number() > args.max_lines:
- break
- try:
- year = extract_year_from_espressif_notice(comment.text())
- except NotFound:
- pass
- else:
- return (year, comment.line_number())
- raise NotFound('Old Espressif header')
- def format_years(past: int, file_name: str) -> str:
- """
- Function to format a year:
- - just current year -> output: [year]
- - some year in the past -> output: [past year]-[current year]
- """
- today = datetime.datetime.now().year
- if past == 0:
- # use the current year
- past = today
- if past == today:
- return str(past)
- if past > today or past < 1972:
- error_msg = f'{file_name}: invalid year in the copyright header detected. ' \
- + 'Check your system clock and the copyright header.'
- raise ValueError(error_msg)
- return '{past}-{today}'.format(past=past, today=today)
- def check_copyrights(args: argparse.Namespace, config: configparser.ConfigParser) -> Tuple[List, List]:
- """
- Main logic and for loop
- returns:
- list of files with wrong headers
- list of files which were modified
- """
- wrong_header_files = []
- modified_files = []
- pathspecs = {}
- with open(IGNORE_LIST_FN, 'r') as f:
- ignore_list = [item.strip() for item in f.readlines()]
- updated_ignore_list = ignore_list.copy()
- # compile the file patterns
- for section in config.sections():
- # configparser stores all values as strings
- patterns = ast.literal_eval(config[section]['include'])
- try:
- pathspecs[section] = pathspec.PathSpec.from_lines('gitwildmatch', patterns)
- except TypeError:
- print(f'Error while compiling file patterns. Section {section} has invalid include option. Must be a list of file patterns.')
- sys.exit(1)
- for file_name in args.filenames:
- try:
- mime = get_file_mime(file_name)
- except UnsupportedFileType:
- print(f'{TERMINAL_GRAY}"{file_name}" is not of a supported type! Skipping.{TERMINAL_RESET}')
- continue
- matched_section = 'DEFAULT'
- for section in config.sections():
- if pathspecs[section].match_file(file_name):
- if args.debug:
- print(f'{TERMINAL_GRAY}{file_name} matched {section}{TERMINAL_RESET}')
- matched_section = section
- if config[matched_section]['perform_check'] == 'False': # configparser stores all values as strings
- print(f'{TERMINAL_GRAY}"{file_name}" is using config section "{matched_section}" which does not perform the check! Skipping.{TERMINAL_RESET}')
- continue
- if file_name in ignore_list:
- if args.verbose:
- print(f'{TERMINAL_GRAY}"{file_name}" is on the ignore list.{TERMINAL_RESET}')
- valid, modified = has_valid_copyright(file_name, mime, True, config[matched_section], args)
- if modified:
- modified_files.append(CustomFile(file_name, True))
- if valid:
- if args.dont_update_ignore_list:
- print(f'{TERMINAL_YELLOW}"{file_name}" now has a correct copyright header - remove it from the ignore list '
- f'or run this script without the --dont-update-ignore-list option to do this automatically!{TERMINAL_RESET}')
- else:
- updated_ignore_list.remove(file_name)
- else:
- wrong_header_files.append(CustomFile(file_name, True))
- else:
- valid, modified = has_valid_copyright(file_name, mime, False, config[matched_section], args)
- if modified:
- modified_files.append(CustomFile(file_name, False))
- if not valid:
- wrong_header_files.append(CustomFile(file_name, False))
- if updated_ignore_list != ignore_list:
- with open(IGNORE_LIST_FN, 'w') as f:
- for item in updated_ignore_list:
- f.write(f'{item}\n')
- modified_files.append(CustomFile(IGNORE_LIST_FN, False))
- print(f'\n{TERMINAL_GREEN}Files removed from ignore list:{TERMINAL_RESET}')
- for file in ignore_list:
- if file not in updated_ignore_list:
- print(f' {file}')
- return wrong_header_files, modified_files
- def build_parser() -> argparse.ArgumentParser:
- parser = argparse.ArgumentParser(description='Check copyright headers')
- parser.add_argument('-v', '--verbose', action='store_true',
- help='print more information (useful for debugging)')
- parser.add_argument('-r', '--replace', action='store_true',
- help='tries to update copyright notices')
- parser.add_argument('-m', '--max-lines', type=int, default=30,
- help='how far to check for copyright notice in a file (default 30)')
- parser.add_argument('-f', '--fuzzy-ratio', type=int, default=95,
- help='minimum %% ratio to be considered as equal to the old header style (default 95)')
- parser.add_argument('-d', '--debug', action='store_true',
- help='print debug info')
- parser.add_argument('-du', '--dont-update-ignore-list', action='store_true')
- parser.add_argument('filenames', nargs='+', help='file(s) to check', metavar='file')
- return parser
- def debug_output(args: argparse.Namespace, config: configparser.ConfigParser) -> None:
- print(f'{TERMINAL_GRAY}Running with args: {args}')
- print(f'Config file: {CONFIG_FN}')
- print(f'Ignore list: {IGNORE_LIST_FN}{TERMINAL_RESET}')
- print(f'Sections: {config.sections()}')
- for section in config:
- print(f'section: "{section}"')
- for key in config[section]:
- print(f' {key}: "{config[section][key]}"')
- def allowed_license_combination(license_to_match: str, all_licenses: List[str]) -> bool:
- """
- Licenses can be combined together with the OR keyword. Therefore, a simple "in" lookup in a list is not enough.
- For example, if "A" and "B" are supported then "A OR B" and "B OR A" should be supported as well.
- """
- if license_to_match in all_licenses:
- # This is the simple case, for example, when "A" is used from the list ["A", "B"]
- return True
- # for example, if license_to_match is "A OR B" then the following split will be ["A", "B"]
- split_list = [sp for sp in map(str.strip, license_to_match.split(' OR ')) if len(sp) > 0]
- # for example, "A" and "B" needs to be in the supported list in order to match "A OR B".
- return all(i in all_licenses for i in split_list)
- def verify_config(config: configparser.ConfigParser) -> None:
- fail = False
- for section in config:
- license_for_new_files = config[section]['license_for_new_files']
- # configparser stores all values as strings
- allowed_licenses = ast.literal_eval(config[section]['allowed_licenses'])
- if not allowed_license_combination(license_for_new_files, allowed_licenses):
- print(f'Invalid config, section "{section}":\nDefault license for new files '
- f'({license_for_new_files}) is not on the allowed licenses list {allowed_licenses}.')
- fail = True
- for section in config.sections():
- if 'include' not in config[section]:
- print(f'Invalid config, section "{section}":\nSection does not have the "include" option set.')
- fail = True
- if fail:
- sys.exit(1)
- def main() -> None:
- args = build_parser().parse_args()
- config = configparser.ConfigParser()
- with open(CONFIG_FN, 'r') as f:
- yaml_dict = yaml.safe_load(f)
- config.read_dict(yaml_dict)
- if args.debug:
- debug_output(args, config)
- verify_config(config)
- wrong_header_files, modified_files = check_copyrights(args, config)
- abort_commit = bool(modified_files)
- num_files_wrong = 0
- if wrong_header_files:
- print(f'{TERMINAL_YELLOW}Information about this test{TERMINAL_RESET}')
- print(CHECK_FAIL_MESSAGE.format())
- print(f'{TERMINAL_YELLOW}Files which failed the copyright check:{TERMINAL_RESET}')
- for wrong_file in wrong_header_files:
- if not wrong_file.is_on_ignore_list:
- abort_commit = True
- num_files_wrong += 1
- print(wrong_file)
- if modified_files:
- print(f'\n{TERMINAL_YELLOW}Modified files:{TERMINAL_RESET}')
- for file in modified_files:
- print(file)
- print(CHECK_MODIFY_MESSAGE)
- num_files_processed = len(args.filenames)
- print(CHECK_FOOTER_MESSAGE)
- if abort_commit:
- num_files_modified = len(modified_files)
- print(f'{TERMINAL_RED}Processed {num_files_processed} source file{"s"[:num_files_processed^1]},', end=' ')
- print(f'{num_files_modified} were modified and {num_files_wrong} have an invalid copyright (excluding ones on the ignore list).{TERMINAL_RESET}')
- sys.exit(1) # sys.exit(1) to abort the commit
- # pre-commit also automatically aborts a commit if files are modified on disk
- print(f'{TERMINAL_GREEN}Successfully processed {num_files_processed} file{"s"[:num_files_processed^1]}.{TERMINAL_RESET}')
- if __name__ == '__main__':
- main()
|