byext.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. #! /usr/bin/env python3
  2. """Show file statistics by extension."""
  3. import os
  4. import sys
  5. class Stats:
  6. def __init__(self):
  7. self.stats = {}
  8. def statargs(self, args):
  9. for arg in args:
  10. if os.path.isdir(arg):
  11. self.statdir(arg)
  12. elif os.path.isfile(arg):
  13. self.statfile(arg)
  14. else:
  15. sys.stderr.write("Can't find %s\n" % arg)
  16. self.addstats("<???>", "unknown", 1)
  17. def statdir(self, dir):
  18. self.addstats("<dir>", "dirs", 1)
  19. try:
  20. names = os.listdir(dir)
  21. except OSError as err:
  22. sys.stderr.write("Can't list %s: %s\n" % (dir, err))
  23. self.addstats("<dir>", "unlistable", 1)
  24. return
  25. for name in sorted(names):
  26. if name.startswith(".#"):
  27. continue # Skip CVS temp files
  28. if name.endswith("~"):
  29. continue # Skip Emacs backup files
  30. full = os.path.join(dir, name)
  31. if os.path.islink(full):
  32. self.addstats("<lnk>", "links", 1)
  33. elif os.path.isdir(full):
  34. self.statdir(full)
  35. else:
  36. self.statfile(full)
  37. def statfile(self, filename):
  38. head, ext = os.path.splitext(filename)
  39. head, base = os.path.split(filename)
  40. if ext == base:
  41. ext = "" # E.g. .cvsignore is deemed not to have an extension
  42. ext = os.path.normcase(ext)
  43. if not ext:
  44. ext = "<none>"
  45. self.addstats(ext, "files", 1)
  46. try:
  47. with open(filename, "rb") as f:
  48. data = f.read()
  49. except IOError as err:
  50. sys.stderr.write("Can't open %s: %s\n" % (filename, err))
  51. self.addstats(ext, "unopenable", 1)
  52. return
  53. self.addstats(ext, "bytes", len(data))
  54. if b'\0' in data:
  55. self.addstats(ext, "binary", 1)
  56. return
  57. if not data:
  58. self.addstats(ext, "empty", 1)
  59. # self.addstats(ext, "chars", len(data))
  60. lines = str(data, "latin-1").splitlines()
  61. self.addstats(ext, "lines", len(lines))
  62. del lines
  63. words = data.split()
  64. self.addstats(ext, "words", len(words))
  65. def addstats(self, ext, key, n):
  66. d = self.stats.setdefault(ext, {})
  67. d[key] = d.get(key, 0) + n
  68. def report(self):
  69. exts = sorted(self.stats)
  70. # Get the column keys
  71. columns = {}
  72. for ext in exts:
  73. columns.update(self.stats[ext])
  74. cols = sorted(columns)
  75. colwidth = {}
  76. colwidth["ext"] = max(map(len, exts))
  77. minwidth = 6
  78. self.stats["TOTAL"] = {}
  79. for col in cols:
  80. total = 0
  81. cw = max(minwidth, len(col))
  82. for ext in exts:
  83. value = self.stats[ext].get(col)
  84. if value is None:
  85. w = 0
  86. else:
  87. w = len("%d" % value)
  88. total += value
  89. cw = max(cw, w)
  90. cw = max(cw, len(str(total)))
  91. colwidth[col] = cw
  92. self.stats["TOTAL"][col] = total
  93. exts.append("TOTAL")
  94. for ext in exts:
  95. self.stats[ext]["ext"] = ext
  96. cols.insert(0, "ext")
  97. def printheader():
  98. for col in cols:
  99. print("%*s" % (colwidth[col], col), end=' ')
  100. print()
  101. printheader()
  102. for ext in exts:
  103. for col in cols:
  104. value = self.stats[ext].get(col, "")
  105. print("%*s" % (colwidth[col], value), end=' ')
  106. print()
  107. printheader() # Another header at the bottom
  108. def main():
  109. args = sys.argv[1:]
  110. if not args:
  111. args = [os.curdir]
  112. s = Stats()
  113. s.statargs(args)
  114. s.report()
  115. if __name__ == "__main__":
  116. main()