pysource.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/env python3
  2. """\
  3. List python source files.
  4. There are three functions to check whether a file is a Python source, listed
  5. here with increasing complexity:
  6. - has_python_ext() checks whether a file name ends in '.py[w]'.
  7. - look_like_python() checks whether the file is not binary and either has
  8. the '.py[w]' extension or the first line contains the word 'python'.
  9. - can_be_compiled() checks whether the file can be compiled by compile().
  10. The file also must be of appropriate size - not bigger than a megabyte.
  11. walk_python_files() recursively lists all Python files under the given directories.
  12. """
  13. __author__ = "Oleg Broytmann, Georg Brandl"
  14. __all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]
  15. import os, re
  16. binary_re = re.compile(br'[\x00-\x08\x0E-\x1F\x7F]')
  17. debug = False
  18. def print_debug(msg):
  19. if debug: print(msg)
  20. def _open(fullpath):
  21. try:
  22. size = os.stat(fullpath).st_size
  23. except OSError as err: # Permission denied - ignore the file
  24. print_debug("%s: permission denied: %s" % (fullpath, err))
  25. return None
  26. if size > 1024*1024: # too big
  27. print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
  28. return None
  29. try:
  30. return open(fullpath, "rb")
  31. except IOError as err: # Access denied, or a special file - ignore it
  32. print_debug("%s: access denied: %s" % (fullpath, err))
  33. return None
  34. def has_python_ext(fullpath):
  35. return fullpath.endswith(".py") or fullpath.endswith(".pyw")
  36. def looks_like_python(fullpath):
  37. infile = _open(fullpath)
  38. if infile is None:
  39. return False
  40. with infile:
  41. line = infile.readline()
  42. if binary_re.search(line):
  43. # file appears to be binary
  44. print_debug("%s: appears to be binary" % fullpath)
  45. return False
  46. if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
  47. return True
  48. elif b"python" in line:
  49. # disguised Python script (e.g. CGI)
  50. return True
  51. return False
  52. def can_be_compiled(fullpath):
  53. infile = _open(fullpath)
  54. if infile is None:
  55. return False
  56. with infile:
  57. code = infile.read()
  58. try:
  59. compile(code, fullpath, "exec")
  60. except Exception as err:
  61. print_debug("%s: cannot compile: %s" % (fullpath, err))
  62. return False
  63. return True
  64. def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
  65. """\
  66. Recursively yield all Python source files below the given paths.
  67. paths: a list of files and/or directories to be checked.
  68. is_python: a function that takes a file name and checks whether it is a
  69. Python source file
  70. exclude_dirs: a list of directory base names that should be excluded in
  71. the search
  72. """
  73. if exclude_dirs is None:
  74. exclude_dirs=[]
  75. for path in paths:
  76. print_debug("testing: %s" % path)
  77. if os.path.isfile(path):
  78. if is_python(path):
  79. yield path
  80. elif os.path.isdir(path):
  81. print_debug(" it is a directory")
  82. for dirpath, dirnames, filenames in os.walk(path):
  83. for exclude in exclude_dirs:
  84. if exclude in dirnames:
  85. dirnames.remove(exclude)
  86. for filename in filenames:
  87. fullpath = os.path.join(dirpath, filename)
  88. print_debug("testing: %s" % fullpath)
  89. if is_python(fullpath):
  90. yield fullpath
  91. else:
  92. print_debug(" unknown type")
  93. if __name__ == "__main__":
  94. # Two simple examples/tests
  95. for fullpath in walk_python_files(['.']):
  96. print(fullpath)
  97. print("----------")
  98. for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
  99. print(fullpath)