check_readme_links.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. #!/usr/bin/env python
  2. #
  3. # Checks that all links in the readme markdown files are valid
  4. #
  5. # Copyright 2020 Espressif Systems (Shanghai) PTE LTD
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. #
  19. import argparse
  20. import concurrent.futures
  21. import os
  22. import os.path
  23. import re
  24. import urllib.error
  25. import urllib.request
  26. from collections import defaultdict, namedtuple
  27. from pathlib import Path
  28. EXCLUDE_DOCS_LIST = ['examples/peripherals/secure_element/atecc608_ecdsa/components/esp-cryptoauthlib/cryptoauthlib/**']
  29. # The apple apps links are not accessible from the company network for some reason
  30. EXCLUDE_URL_LIST = ['https://apps.apple.com/in/app/esp-ble-provisioning/id1473590141', 'https://apps.apple.com/in/app/esp-softap-provisioning/id1474040630']
  31. Link = namedtuple('Link', ['file', 'url'])
  32. class ReadmeLinkError(Exception):
  33. def __init__(self, file, url):
  34. self.file = file
  35. self.url = url
  36. class RelativeLinkError(ReadmeLinkError):
  37. def __str__(self):
  38. return 'Relative link error, file - {} not found, linked from {}'.format(self.url, self.file)
  39. class UrlLinkError(ReadmeLinkError):
  40. def __init__(self, file, url, error_code):
  41. self.error_code = error_code
  42. super().__init__(file, url)
  43. def __str__(self):
  44. files = [str(f) for f in self.file]
  45. return 'URL error, url - {} in files - {} is not accessible, request returned {}'.format(self.url, ', '.join(files), self.error_code)
  46. # we do not want a failed test just due to bad network conditions, for non 404 errors we simply print a warning
  47. def check_url(url, files, timeout):
  48. try:
  49. with urllib.request.urlopen(url, timeout=timeout):
  50. return
  51. except urllib.error.HTTPError as e:
  52. if e.code == 404:
  53. raise UrlLinkError(files, url, str(e))
  54. else:
  55. print('Unable to access {}, err = {}'.format(url, str(e)))
  56. except Exception as e:
  57. print('Unable to access {}, err = {}'.format(url, str(e)))
  58. def check_web_links(web_links):
  59. with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
  60. errors = []
  61. future_to_url = {executor.submit(check_url, url, files, timeout=30): (url, files) for url, files in web_links.items()}
  62. for future in concurrent.futures.as_completed(future_to_url):
  63. try:
  64. future.result()
  65. except UrlLinkError as e:
  66. errors.append(e)
  67. return errors
  68. def check_file_links(file_links):
  69. errors = []
  70. for link in file_links:
  71. link_path = link.file.parent / link.url
  72. if not Path.exists(link_path):
  73. errors.append(RelativeLinkError(link.file, link.url))
  74. print('Found {} errors with relative links'.format(len(errors)))
  75. return errors
  76. def get_md_links(folder):
  77. MD_LINK_RE = r'\[.+?\]\((.+?)(#.+)?\)'
  78. idf_path = Path(os.getenv('IDF_PATH'))
  79. links = []
  80. for path in (idf_path / folder).rglob('*.md'):
  81. if any([path.relative_to(idf_path).match(exclude_doc) for exclude_doc in EXCLUDE_DOCS_LIST]):
  82. print('{} - excluded'.format(path))
  83. continue
  84. with path.open(encoding='utf8') as f:
  85. content = f.read()
  86. for url in re.findall(MD_LINK_RE, content):
  87. link = Link(path, url[0].lstrip())
  88. # Ignore "local" links
  89. if not link.url.startswith('#'):
  90. links.append(link)
  91. return links
  92. def check_readme_links(args):
  93. links = get_md_links('examples')
  94. print('Found {} links'.format(len(links)))
  95. errors = []
  96. web_links = defaultdict(list)
  97. file_links = []
  98. # Sort links into file and web links
  99. for link in links:
  100. if link.url.startswith('http'):
  101. web_links[link.url].append(link.file)
  102. else:
  103. file_links.append(link)
  104. for url in EXCLUDE_URL_LIST:
  105. del web_links[url]
  106. errors.extend(check_file_links(file_links))
  107. if not args.skip_weburl:
  108. errors.extend(check_web_links(web_links))
  109. print('Found {} errors:'.format(len(errors)))
  110. for e in errors:
  111. print(e)
  112. if errors:
  113. raise e
  114. if __name__ == '__main__':
  115. parser = argparse.ArgumentParser(description='check_readme_links.py: Checks for dead links in example READMEs', prog='check_readme_links.py')
  116. parser.add_argument('--skip-weburl', '-w', action='store_true', help='Skip checking of web URLs, only check links to local files')
  117. args = parser.parse_args()
  118. check_readme_links(args)