external_content.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. """
  2. External content
  3. ################
  4. Copyright (c) 2021 Nordic Semiconductor ASA
  5. SPDX-License-Identifier: Apache-2.0
  6. Introduction
  7. ============
  8. This extension allows to import sources from directories out of the Sphinx
  9. source directory. They are copied to the source directory before starting the
  10. build. Note that the copy is *smart*, that is, only updated files are actually
  11. copied. Therefore, incremental builds detect changes correctly and behave as
  12. expected.
  13. Links to external content not included in the generated documentation are
  14. transformed to external links as needed.
  15. Configuration options
  16. =====================
  17. - ``external_content_contents``: A list of external contents. Each entry is
  18. a tuple with two fields: the external base directory and a file glob pattern.
  19. - ``external_content_link_prefixes``: A list of link prefixes out of scope.
  20. All links to content with these prefixes are made external.
  21. - ``external_content_link_extensions``: A list of file extensions in scope of
  22. the documentation. All links to content without these file extensions are
  23. made external.
  24. - ``external_content_keep``: A list of file globs (relative to the destination
  25. directory) that should be kept even if they do not exist in the source
  26. directory. This option can be useful for auto-generated files in the
  27. destination directory.
  28. """
  29. import filecmp
  30. import os
  31. import re
  32. import shutil
  33. import tempfile
  34. from pathlib import Path
  35. from typing import Any, Dict, List, Optional
  36. from sphinx.application import Sphinx
  37. __version__ = "0.1.0"
  38. DIRECTIVES = ("figure", "image", "include", "literalinclude")
  39. """Default directives for included content."""
  40. EXTERNAL_LINK_URL_PREFIX = (
  41. "https://github.com/project-chip/connectedhomeip/blob/master/"
  42. )
  43. def adjust_includes(
  44. fname: Path,
  45. basepath: Path,
  46. encoding: str,
  47. link_prefixes: List[str],
  48. extensions: List[str],
  49. targets: List[Path],
  50. dstpath: Optional[Path] = None,
  51. ) -> None:
  52. """Adjust included content paths.
  53. Args:
  54. fname: File to be processed.
  55. basepath: Base path to be used to resolve content location.
  56. encoding: Sources encoding.
  57. link_prefixes: Prefixes of links that are made external.
  58. extensions: Filename extensions links to which are not made external.
  59. targets: List of all files that are being copied.
  60. dstpath: Destination path for fname if its path is not the actual destination.
  61. """
  62. if fname.suffix != ".md":
  63. return
  64. dstpath = dstpath or fname.parent
  65. def _adjust_path(path):
  66. # ignore absolute paths, section links, hyperlinks and same folder
  67. if path.startswith(("/", "#", "http", "www")) or "/" not in path:
  68. return path
  69. # for files that are being copied modify reference to and out of /docs
  70. filepath = path.split("#")[0]
  71. absolute = (basepath / filepath).resolve()
  72. if absolute in targets:
  73. if "docs/" in path:
  74. path = path.replace("docs/", "")
  75. elif "../examples" in path:
  76. path = path.replace("../", "", 1)
  77. return path
  78. # otherwise change links to point to their targets' original location
  79. return Path(os.path.relpath(basepath / path, dstpath)).as_posix()
  80. def _adjust_links(m):
  81. displayed, fpath = m.groups()
  82. fpath_adj = _adjust_path(fpath)
  83. return f"[{displayed}]({fpath_adj})"
  84. def _adjust_external(m):
  85. displayed, target = m.groups()
  86. return f"[{displayed}]({EXTERNAL_LINK_URL_PREFIX}{target})"
  87. def _adjust_filetype(m):
  88. displayed, target, extension = m.groups()
  89. if extension.lower() in extensions or target.startswith("http"):
  90. return m.group(0)
  91. return f"[{displayed}]({EXTERNAL_LINK_URL_PREFIX}{target})"
  92. def _adjust_image_link(m):
  93. prefix, fpath, postfix = m.groups()
  94. fpath_adj = _adjust_path(fpath)
  95. return f"{prefix}{fpath_adj}{postfix}"
  96. rules = [
  97. # Find any links and adjust the path
  98. (r"\[([^\[\]]*)\]\s*\((.*)\)", _adjust_links),
  99. # Find links that lead to an external folder and transform it
  100. # into an external link.
  101. (
  102. r"\[([^\[\]]*)\]\s*\((?:\.\./)*((?:" + "|".join(link_prefixes) + r")[^)]*)\)",
  103. _adjust_external,
  104. ),
  105. # Find links that lead to a non-presentable filetype and transform
  106. # it into an external link.
  107. (
  108. r"\[([^\[\]]*)\]\s*\((?:\.\./)*((?:[^()]+?/)*[^.()]+?(\.[^)/#]+))(?:#[^)]+)?\)",
  109. _adjust_filetype,
  110. ),
  111. # Find links that lead to a folder and transform it into an external link.
  112. (
  113. r"\[([^\[\]]*)\]\s*\((?:\.\./)*((?:[^()]+?/)+[^).#/]+)(\))",
  114. _adjust_filetype,
  115. ),
  116. # Find image links in img tags and adjust them
  117. (r"(<img [^>]*src=[\"'])([^ >]+)([\"'][^>]*>)", _adjust_image_link)
  118. ]
  119. with open(fname, "r+", encoding=encoding) as f:
  120. content = f.read()
  121. modified = False
  122. for pattern, sub_func in rules:
  123. content, changes_made = re.subn(pattern, sub_func, content)
  124. modified = modified or changes_made
  125. if modified:
  126. f.seek(0)
  127. f.write(content)
  128. f.truncate()
  129. def sync_contents(app: Sphinx) -> None:
  130. """Synchronize external contents.
  131. Args:
  132. app: Sphinx application instance.
  133. """
  134. srcdir = Path(app.srcdir).resolve()
  135. to_copy = []
  136. to_delete = set(f for f in srcdir.glob("**/*") if not f.is_dir())
  137. to_keep = set(
  138. f
  139. for k in app.config.external_content_keep
  140. for f in srcdir.glob(k)
  141. if not f.is_dir()
  142. )
  143. for content in app.config.external_content_contents:
  144. prefix_src, glob = content
  145. for src in prefix_src.glob(glob):
  146. if src.is_dir():
  147. to_copy.extend(
  148. [(f, prefix_src) for f in src.glob("**/*") if not f.is_dir()]
  149. )
  150. else:
  151. to_copy.append((src, prefix_src))
  152. list_of_destinations = [f for f, _ in to_copy]
  153. for entry in to_copy:
  154. src, prefix_src = entry
  155. dst = (srcdir / src.relative_to(prefix_src)).resolve()
  156. if dst in to_delete:
  157. to_delete.remove(dst)
  158. if not dst.parent.exists():
  159. dst.parent.mkdir(parents=True)
  160. # just copy if it does not exist
  161. if not dst.exists():
  162. shutil.copy(src, dst)
  163. adjust_includes(
  164. dst,
  165. src.parent,
  166. app.config.source_encoding,
  167. app.config.external_content_link_prefixes,
  168. app.config.external_content_link_extensions,
  169. list_of_destinations,
  170. )
  171. # if origin file is modified only copy if different
  172. elif src.stat().st_mtime > dst.stat().st_mtime:
  173. with tempfile.TemporaryDirectory() as td:
  174. # adjust origin includes before comparing
  175. src_adjusted = Path(td) / src.name
  176. shutil.copy(src, src_adjusted)
  177. adjust_includes(
  178. src_adjusted,
  179. src.parent,
  180. app.config.source_encoding,
  181. app.config.external_content_link_prefixes,
  182. app.config.external_content_link_extensions,
  183. list_of_destinations,
  184. dstpath=dst.parent,
  185. )
  186. if not filecmp.cmp(src_adjusted, dst):
  187. dst.unlink()
  188. shutil.move(os.fspath(src_adjusted), os.fspath(dst))
  189. # remove any previously copied file not present in the origin folder,
  190. # excepting those marked to be kept.
  191. for file in to_delete - to_keep:
  192. file.unlink()
  193. def setup(app: Sphinx) -> Dict[str, Any]:
  194. app.add_config_value("external_content_contents", [], "env")
  195. app.add_config_value("external_content_keep", [], "")
  196. app.add_config_value("external_content_link_prefixes", [], "env")
  197. app.add_config_value("external_content_link_extensions", [], "env")
  198. app.connect("builder-inited", sync_contents)
  199. return {
  200. "version": __version__,
  201. "parallel_read_safe": True,
  202. "parallel_write_safe": True,
  203. }