check_readme_links.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. #!/usr/bin/env python
  2. #
  3. # Checks that all links in the readme markdown files are valid
  4. #
  5. # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. #
  19. import argparse
  20. import concurrent.futures
  21. import os
  22. import os.path
  23. import re
  24. import sys
  25. import urllib.error
  26. import urllib.request
  27. from collections import defaultdict, namedtuple
  28. from pathlib import Path
  29. EXCLUDE_DOCS_LIST = ['examples/peripherals/secure_element/atecc608_ecdsa/components/esp-cryptoauthlib/cryptoauthlib/**']
  30. # The apple apps links are not accessible from the company network for some reason
  31. EXCLUDE_URL_LIST = ['https://apps.apple.com/in/app/esp-ble-provisioning/id1473590141', 'https://apps.apple.com/in/app/esp-softap-provisioning/id1474040630']
  32. Link = namedtuple('Link', ['file', 'url'])
  33. class ReadmeLinkError(Exception):
  34. def __init__(self, file, url):
  35. self.file = file
  36. self.url = url
  37. class RelativeLinkError(ReadmeLinkError):
  38. def __str__(self):
  39. return 'Relative link error, file - {} not found, linked from {}'.format(self.url, self.file)
  40. class UrlLinkError(ReadmeLinkError):
  41. def __init__(self, file, url, error_code):
  42. self.error_code = error_code
  43. super().__init__(file, url)
  44. def __str__(self):
  45. files = [str(f) for f in self.file]
  46. return 'URL error, url - {} in files - {} is not accessible, request returned {}'.format(self.url, ', '.join(files), self.error_code)
  47. # we do not want a failed test just due to bad network conditions, for non 404 errors we simply print a warning
  48. def check_url(url, files, timeout):
  49. try:
  50. with urllib.request.urlopen(url, timeout=timeout):
  51. return
  52. except urllib.error.HTTPError as e:
  53. if e.code == 404:
  54. raise UrlLinkError(files, url, str(e))
  55. else:
  56. print('Unable to access {}, err = {}'.format(url, str(e)))
  57. except Exception as e:
  58. print('Unable to access {}, err = {}'.format(url, str(e)))
  59. def check_web_links(web_links):
  60. with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
  61. errors = []
  62. future_to_url = {executor.submit(check_url, url, files, timeout=30): (url, files) for url, files in web_links.items()}
  63. for future in concurrent.futures.as_completed(future_to_url):
  64. try:
  65. future.result()
  66. except UrlLinkError as e:
  67. errors.append(e)
  68. return errors
  69. def check_file_links(file_links):
  70. errors = []
  71. for link in file_links:
  72. link_path = link.file.parent / link.url
  73. if not Path.exists(link_path):
  74. errors.append(RelativeLinkError(link.file, link.url))
  75. print('Found {} errors with relative links'.format(len(errors)))
  76. return errors
  77. def get_md_links(folder):
  78. MD_LINK_RE = r'\[.+?\]\((.+?)(#.+)?\)'
  79. idf_path = Path(os.getenv('IDF_PATH'))
  80. links = []
  81. for path in (idf_path / folder).rglob('*.md'):
  82. if any([path.relative_to(idf_path).match(exclude_doc) for exclude_doc in EXCLUDE_DOCS_LIST]):
  83. print('{} - excluded'.format(path))
  84. continue
  85. with path.open(encoding='utf8') as f:
  86. content = f.read()
  87. for url in re.findall(MD_LINK_RE, content):
  88. link = Link(path, url[0].lstrip())
  89. # Ignore "local" links
  90. if not link.url.startswith('#'):
  91. links.append(link)
  92. return links
  93. def check_readme_links(args):
  94. links = get_md_links('examples')
  95. print('Found {} links'.format(len(links)))
  96. errors = []
  97. web_links = defaultdict(list)
  98. file_links = []
  99. # Sort links into file and web links
  100. for link in links:
  101. if link.url.startswith('http'):
  102. web_links[link.url].append(link.file)
  103. else:
  104. file_links.append(link)
  105. for url in EXCLUDE_URL_LIST:
  106. del web_links[url]
  107. errors.extend(check_file_links(file_links))
  108. if not args.skip_weburl:
  109. errors.extend(check_web_links(web_links))
  110. print('Found {} errors:'.format(len(errors)))
  111. for e in errors:
  112. print(e)
  113. return 1 if len(errors) > 0 else 0
  114. if __name__ == '__main__':
  115. parser = argparse.ArgumentParser(description='check_readme_links.py: Checks for dead links in example READMEs', prog='check_readme_links.py')
  116. parser.add_argument('--skip-weburl', '-w', action='store_true', help='Skip checking of web URLs, only check links to local files')
  117. args = parser.parse_args()
  118. sys.exit(check_readme_links(args))