glob.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. """Filename globbing utility."""
  2. import os
  3. import re
  4. import fnmatch
  5. __all__ = ["glob", "iglob", "escape"]
  6. def glob(pathname, *, recursive=False):
  7. """Return a list of paths matching a pathname pattern.
  8. The pattern may contain simple shell-style wildcards a la
  9. fnmatch. However, unlike fnmatch, filenames starting with a
  10. dot are special cases that are not matched by '*' and '?'
  11. patterns.
  12. If recursive is true, the pattern '**' will match any files and
  13. zero or more directories and subdirectories.
  14. """
  15. return list(iglob(pathname, recursive=recursive))
  16. def iglob(pathname, *, recursive=False):
  17. """Return an iterator which yields the paths matching a pathname pattern.
  18. The pattern may contain simple shell-style wildcards a la
  19. fnmatch. However, unlike fnmatch, filenames starting with a
  20. dot are special cases that are not matched by '*' and '?'
  21. patterns.
  22. If recursive is true, the pattern '**' will match any files and
  23. zero or more directories and subdirectories.
  24. """
  25. it = _iglob(pathname, recursive, False)
  26. if recursive and _isrecursive(pathname):
  27. s = next(it) # skip empty string
  28. assert not s
  29. return it
  30. def _iglob(pathname, recursive, dironly):
  31. dirname, basename = os.path.split(pathname)
  32. if not has_magic(pathname):
  33. assert not dironly
  34. if basename:
  35. if os.path.lexists(pathname):
  36. yield pathname
  37. else:
  38. # Patterns ending with a slash should match only directories
  39. if os.path.isdir(dirname):
  40. yield pathname
  41. return
  42. if not dirname:
  43. if recursive and _isrecursive(basename):
  44. yield from _glob2(dirname, basename, dironly)
  45. else:
  46. yield from _glob1(dirname, basename, dironly)
  47. return
  48. # `os.path.split()` returns the argument itself as a dirname if it is a
  49. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  50. # contains magic characters (i.e. r'\\?\C:').
  51. if dirname != pathname and has_magic(dirname):
  52. dirs = _iglob(dirname, recursive, True)
  53. else:
  54. dirs = [dirname]
  55. if has_magic(basename):
  56. if recursive and _isrecursive(basename):
  57. glob_in_dir = _glob2
  58. else:
  59. glob_in_dir = _glob1
  60. else:
  61. glob_in_dir = _glob0
  62. for dirname in dirs:
  63. for name in glob_in_dir(dirname, basename, dironly):
  64. yield os.path.join(dirname, name)
  65. # These 2 helper functions non-recursively glob inside a literal directory.
  66. # They return a list of basenames. _glob1 accepts a pattern while _glob0
  67. # takes a literal basename (so it only has to check for its existence).
  68. def _glob1(dirname, pattern, dironly):
  69. names = list(_iterdir(dirname, dironly))
  70. if not _ishidden(pattern):
  71. names = (x for x in names if not _ishidden(x))
  72. return fnmatch.filter(names, pattern)
  73. def _glob0(dirname, basename, dironly):
  74. if not basename:
  75. # `os.path.split()` returns an empty basename for paths ending with a
  76. # directory separator. 'q*x/' should match only directories.
  77. if os.path.isdir(dirname):
  78. return [basename]
  79. else:
  80. if os.path.lexists(os.path.join(dirname, basename)):
  81. return [basename]
  82. return []
  83. # Following functions are not public but can be used by third-party code.
  84. def glob0(dirname, pattern):
  85. return _glob0(dirname, pattern, False)
  86. def glob1(dirname, pattern):
  87. return _glob1(dirname, pattern, False)
  88. # This helper function recursively yields relative pathnames inside a literal
  89. # directory.
  90. def _glob2(dirname, pattern, dironly):
  91. assert _isrecursive(pattern)
  92. yield pattern[:0]
  93. yield from _rlistdir(dirname, dironly)
  94. # If dironly is false, yields all file names inside a directory.
  95. # If dironly is true, yields only directory names.
  96. def _iterdir(dirname, dironly):
  97. if not dirname:
  98. if isinstance(dirname, bytes):
  99. dirname = bytes(os.curdir, 'ASCII')
  100. else:
  101. dirname = os.curdir
  102. try:
  103. with os.scandir(dirname) as it:
  104. for entry in it:
  105. try:
  106. if not dironly or entry.is_dir():
  107. yield entry.name
  108. except OSError:
  109. pass
  110. except OSError:
  111. return
  112. # Recursively yields relative pathnames inside a literal directory.
  113. def _rlistdir(dirname, dironly):
  114. names = list(_iterdir(dirname, dironly))
  115. for x in names:
  116. if not _ishidden(x):
  117. yield x
  118. path = os.path.join(dirname, x) if dirname else x
  119. for y in _rlistdir(path, dironly):
  120. yield os.path.join(x, y)
  121. magic_check = re.compile('([*?[])')
  122. magic_check_bytes = re.compile(b'([*?[])')
  123. def has_magic(s):
  124. if isinstance(s, bytes):
  125. match = magic_check_bytes.search(s)
  126. else:
  127. match = magic_check.search(s)
  128. return match is not None
  129. def _ishidden(path):
  130. return path[0] in ('.', b'.'[0])
  131. def _isrecursive(pattern):
  132. if isinstance(pattern, bytes):
  133. return pattern == b'**'
  134. else:
  135. return pattern == '**'
  136. def escape(pathname):
  137. """Escape all special characters.
  138. """
  139. # Escaping is done by wrapping any of "*?[" between square brackets.
  140. # Metacharacters do not work in the drive part and shouldn't be escaped.
  141. drive, pathname = os.path.splitdrive(pathname)
  142. if isinstance(pathname, bytes):
  143. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  144. else:
  145. pathname = magic_check.sub(r'[\1]', pathname)
  146. return drive + pathname