findnocoding.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/env python3
  2. """List all those Python files that require a coding directive
  3. Usage: findnocoding.py dir1 [dir2...]
  4. """
  5. __author__ = "Oleg Broytmann, Georg Brandl"
  6. import sys, os, re, getopt
  7. # our pysource module finds Python source files
  8. try:
  9. import pysource
  10. except ImportError:
  11. # emulate the module with a simple os.walk
  12. class pysource:
  13. has_python_ext = looks_like_python = can_be_compiled = None
  14. def walk_python_files(self, paths, *args, **kwargs):
  15. for path in paths:
  16. if os.path.isfile(path):
  17. yield path.endswith(".py")
  18. elif os.path.isdir(path):
  19. for root, dirs, files in os.walk(path):
  20. for filename in files:
  21. if filename.endswith(".py"):
  22. yield os.path.join(root, filename)
  23. pysource = pysource()
  24. print("The pysource module is not available; "
  25. "no sophisticated Python source file search will be done.", file=sys.stderr)
  26. decl_re = re.compile(rb'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
  27. blank_re = re.compile(rb'^[ \t\f]*(?:[#\r\n]|$)')
  28. def get_declaration(line):
  29. match = decl_re.match(line)
  30. if match:
  31. return match.group(1)
  32. return b''
  33. def has_correct_encoding(text, codec):
  34. try:
  35. str(text, codec)
  36. except UnicodeDecodeError:
  37. return False
  38. else:
  39. return True
  40. def needs_declaration(fullpath):
  41. try:
  42. infile = open(fullpath, 'rb')
  43. except IOError: # Oops, the file was removed - ignore it
  44. return None
  45. with infile:
  46. line1 = infile.readline()
  47. line2 = infile.readline()
  48. if (get_declaration(line1) or
  49. blank_re.match(line1) and get_declaration(line2)):
  50. # the file does have an encoding declaration, so trust it
  51. return False
  52. # check the whole file for non utf-8 characters
  53. rest = infile.read()
  54. if has_correct_encoding(line1+line2+rest, "utf-8"):
  55. return False
  56. return True
  57. usage = """Usage: %s [-cd] paths...
  58. -c: recognize Python source files trying to compile them
  59. -d: debug output""" % sys.argv[0]
  60. if __name__ == '__main__':
  61. try:
  62. opts, args = getopt.getopt(sys.argv[1:], 'cd')
  63. except getopt.error as msg:
  64. print(msg, file=sys.stderr)
  65. print(usage, file=sys.stderr)
  66. sys.exit(1)
  67. is_python = pysource.looks_like_python
  68. debug = False
  69. for o, a in opts:
  70. if o == '-c':
  71. is_python = pysource.can_be_compiled
  72. elif o == '-d':
  73. debug = True
  74. if not args:
  75. print(usage, file=sys.stderr)
  76. sys.exit(1)
  77. for fullpath in pysource.walk_python_files(args, is_python):
  78. if debug:
  79. print("Testing for coding: %s" % fullpath)
  80. result = needs_declaration(fullpath)
  81. if result:
  82. print(fullpath)