pindent.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. #! /usr/bin/env python3
  2. # This file contains a class and a main program that perform three
  3. # related (though complimentary) formatting operations on Python
  4. # programs. When called as "pindent -c", it takes a valid Python
  5. # program as input and outputs a version augmented with block-closing
  6. # comments. When called as "pindent -d", it assumes its input is a
  7. # Python program with block-closing comments and outputs a commentless
  8. # version. When called as "pindent -r" it assumes its input is a
  9. # Python program with block-closing comments but with its indentation
  10. # messed up, and outputs a properly indented version.
  11. # A "block-closing comment" is a comment of the form '# end <keyword>'
  12. # where <keyword> is the keyword that opened the block. If the
  13. # opening keyword is 'def' or 'class', the function or class name may
  14. # be repeated in the block-closing comment as well. Here is an
  15. # example of a program fully augmented with block-closing comments:
  16. # def foobar(a, b):
  17. # if a == b:
  18. # a = a+1
  19. # elif a < b:
  20. # b = b-1
  21. # if b > a: a = a-1
  22. # # end if
  23. # else:
  24. # print 'oops!'
  25. # # end if
  26. # # end def foobar
  27. # Note that only the last part of an if...elif...else... block needs a
  28. # block-closing comment; the same is true for other compound
  29. # statements (e.g. try...except). Also note that "short-form" blocks
  30. # like the second 'if' in the example must be closed as well;
  31. # otherwise the 'else' in the example would be ambiguous (remember
  32. # that indentation is not significant when interpreting block-closing
  33. # comments).
  34. # The operations are idempotent (i.e. applied to their own output
  35. # they yield an identical result). Running first "pindent -c" and
  36. # then "pindent -r" on a valid Python program produces a program that
  37. # is semantically identical to the input (though its indentation may
  38. # be different). Running "pindent -e" on that output produces a
  39. # program that only differs from the original in indentation.
  40. # Other options:
  41. # -s stepsize: set the indentation step size (default 8)
  42. # -t tabsize : set the number of spaces a tab character is worth (default 8)
  43. # -e : expand TABs into spaces
  44. # file ... : input file(s) (default standard input)
  45. # The results always go to standard output
  46. # Caveats:
  47. # - comments ending in a backslash will be mistaken for continued lines
  48. # - continuations using backslash are always left unchanged
  49. # - continuations inside parentheses are not extra indented by -r
  50. # but must be indented for -c to work correctly (this breaks
  51. # idempotency!)
  52. # - continued lines inside triple-quoted strings are totally garbled
  53. # Secret feature:
  54. # - On input, a block may also be closed with an "end statement" --
  55. # this is a block-closing comment without the '#' sign.
  56. # Possible improvements:
  57. # - check syntax based on transitions in 'next' table
  58. # - better error reporting
  59. # - better error recovery
  60. # - check identifier after class/def
  61. # The following wishes need a more complete tokenization of the source:
  62. # - Don't get fooled by comments ending in backslash
  63. # - reindent continuation lines indicated by backslash
  64. # - handle continuation lines inside parentheses/braces/brackets
  65. # - handle triple quoted strings spanning lines
  66. # - realign comments
  67. # - optionally do much more thorough reformatting, a la C indent
  68. # Defaults
  69. STEPSIZE = 8
  70. TABSIZE = 8
  71. EXPANDTABS = False
  72. import io
  73. import re
  74. import sys
  75. next = {}
  76. next['if'] = next['elif'] = 'elif', 'else', 'end'
  77. next['while'] = next['for'] = 'else', 'end'
  78. next['try'] = 'except', 'finally'
  79. next['except'] = 'except', 'else', 'finally', 'end'
  80. next['else'] = next['finally'] = next['with'] = \
  81. next['def'] = next['class'] = 'end'
  82. next['end'] = ()
  83. start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
  84. class PythonIndenter:
  85. def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
  86. indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  87. self.fpi = fpi
  88. self.fpo = fpo
  89. self.indentsize = indentsize
  90. self.tabsize = tabsize
  91. self.lineno = 0
  92. self.expandtabs = expandtabs
  93. self._write = fpo.write
  94. self.kwprog = re.compile(
  95. r'^(?:\s|\\\n)*(?P<kw>[a-z]+)'
  96. r'((?:\s|\\\n)+(?P<id>[a-zA-Z_]\w*))?'
  97. r'[^\w]')
  98. self.endprog = re.compile(
  99. r'^(?:\s|\\\n)*#?\s*end\s+(?P<kw>[a-z]+)'
  100. r'(\s+(?P<id>[a-zA-Z_]\w*))?'
  101. r'[^\w]')
  102. self.wsprog = re.compile(r'^[ \t]*')
  103. # end def __init__
  104. def write(self, line):
  105. if self.expandtabs:
  106. self._write(line.expandtabs(self.tabsize))
  107. else:
  108. self._write(line)
  109. # end if
  110. # end def write
  111. def readline(self):
  112. line = self.fpi.readline()
  113. if line: self.lineno += 1
  114. # end if
  115. return line
  116. # end def readline
  117. def error(self, fmt, *args):
  118. if args: fmt = fmt % args
  119. # end if
  120. sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
  121. self.write('### %s ###\n' % fmt)
  122. # end def error
  123. def getline(self):
  124. line = self.readline()
  125. while line[-2:] == '\\\n':
  126. line2 = self.readline()
  127. if not line2: break
  128. # end if
  129. line += line2
  130. # end while
  131. return line
  132. # end def getline
  133. def putline(self, line, indent):
  134. tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
  135. i = self.wsprog.match(line).end()
  136. line = line[i:]
  137. if line[:1] not in ('\n', '\r', ''):
  138. line = '\t'*tabs + ' '*spaces + line
  139. # end if
  140. self.write(line)
  141. # end def putline
  142. def reformat(self):
  143. stack = []
  144. while True:
  145. line = self.getline()
  146. if not line: break # EOF
  147. # end if
  148. m = self.endprog.match(line)
  149. if m:
  150. kw = 'end'
  151. kw2 = m.group('kw')
  152. if not stack:
  153. self.error('unexpected end')
  154. elif stack.pop()[0] != kw2:
  155. self.error('unmatched end')
  156. # end if
  157. self.putline(line, len(stack))
  158. continue
  159. # end if
  160. m = self.kwprog.match(line)
  161. if m:
  162. kw = m.group('kw')
  163. if kw in start:
  164. self.putline(line, len(stack))
  165. stack.append((kw, kw))
  166. continue
  167. # end if
  168. if kw in next and stack:
  169. self.putline(line, len(stack)-1)
  170. kwa, kwb = stack[-1]
  171. stack[-1] = kwa, kw
  172. continue
  173. # end if
  174. # end if
  175. self.putline(line, len(stack))
  176. # end while
  177. if stack:
  178. self.error('unterminated keywords')
  179. for kwa, kwb in stack:
  180. self.write('\t%s\n' % kwa)
  181. # end for
  182. # end if
  183. # end def reformat
  184. def delete(self):
  185. begin_counter = 0
  186. end_counter = 0
  187. while True:
  188. line = self.getline()
  189. if not line: break # EOF
  190. # end if
  191. m = self.endprog.match(line)
  192. if m:
  193. end_counter += 1
  194. continue
  195. # end if
  196. m = self.kwprog.match(line)
  197. if m:
  198. kw = m.group('kw')
  199. if kw in start:
  200. begin_counter += 1
  201. # end if
  202. # end if
  203. self.write(line)
  204. # end while
  205. if begin_counter - end_counter < 0:
  206. sys.stderr.write('Warning: input contained more end tags than expected\n')
  207. elif begin_counter - end_counter > 0:
  208. sys.stderr.write('Warning: input contained less end tags than expected\n')
  209. # end if
  210. # end def delete
  211. def complete(self):
  212. stack = []
  213. todo = []
  214. currentws = thisid = firstkw = lastkw = topid = ''
  215. while True:
  216. line = self.getline()
  217. i = self.wsprog.match(line).end()
  218. m = self.endprog.match(line)
  219. if m:
  220. thiskw = 'end'
  221. endkw = m.group('kw')
  222. thisid = m.group('id')
  223. else:
  224. m = self.kwprog.match(line)
  225. if m:
  226. thiskw = m.group('kw')
  227. if thiskw not in next:
  228. thiskw = ''
  229. # end if
  230. if thiskw in ('def', 'class'):
  231. thisid = m.group('id')
  232. else:
  233. thisid = ''
  234. # end if
  235. elif line[i:i+1] in ('\n', '#'):
  236. todo.append(line)
  237. continue
  238. else:
  239. thiskw = ''
  240. # end if
  241. # end if
  242. indentws = line[:i]
  243. indent = len(indentws.expandtabs(self.tabsize))
  244. current = len(currentws.expandtabs(self.tabsize))
  245. while indent < current:
  246. if firstkw:
  247. if topid:
  248. s = '# end %s %s\n' % (
  249. firstkw, topid)
  250. else:
  251. s = '# end %s\n' % firstkw
  252. # end if
  253. self.write(currentws + s)
  254. firstkw = lastkw = ''
  255. # end if
  256. currentws, firstkw, lastkw, topid = stack.pop()
  257. current = len(currentws.expandtabs(self.tabsize))
  258. # end while
  259. if indent == current and firstkw:
  260. if thiskw == 'end':
  261. if endkw != firstkw:
  262. self.error('mismatched end')
  263. # end if
  264. firstkw = lastkw = ''
  265. elif not thiskw or thiskw in start:
  266. if topid:
  267. s = '# end %s %s\n' % (
  268. firstkw, topid)
  269. else:
  270. s = '# end %s\n' % firstkw
  271. # end if
  272. self.write(currentws + s)
  273. firstkw = lastkw = topid = ''
  274. # end if
  275. # end if
  276. if indent > current:
  277. stack.append((currentws, firstkw, lastkw, topid))
  278. if thiskw and thiskw not in start:
  279. # error
  280. thiskw = ''
  281. # end if
  282. currentws, firstkw, lastkw, topid = \
  283. indentws, thiskw, thiskw, thisid
  284. # end if
  285. if thiskw:
  286. if thiskw in start:
  287. firstkw = lastkw = thiskw
  288. topid = thisid
  289. else:
  290. lastkw = thiskw
  291. # end if
  292. # end if
  293. for l in todo: self.write(l)
  294. # end for
  295. todo = []
  296. if not line: break
  297. # end if
  298. self.write(line)
  299. # end while
  300. # end def complete
  301. # end class PythonIndenter
  302. # Simplified user interface
  303. # - xxx_filter(input, output): read and write file objects
  304. # - xxx_string(s): take and return string object
  305. # - xxx_file(filename): process file in place, return true iff changed
  306. def complete_filter(input = sys.stdin, output = sys.stdout,
  307. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  308. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  309. pi.complete()
  310. # end def complete_filter
  311. def delete_filter(input= sys.stdin, output = sys.stdout,
  312. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  313. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  314. pi.delete()
  315. # end def delete_filter
  316. def reformat_filter(input = sys.stdin, output = sys.stdout,
  317. stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  318. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  319. pi.reformat()
  320. # end def reformat_filter
  321. def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  322. input = io.StringIO(source)
  323. output = io.StringIO()
  324. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  325. pi.complete()
  326. return output.getvalue()
  327. # end def complete_string
  328. def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  329. input = io.StringIO(source)
  330. output = io.StringIO()
  331. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  332. pi.delete()
  333. return output.getvalue()
  334. # end def delete_string
  335. def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  336. input = io.StringIO(source)
  337. output = io.StringIO()
  338. pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
  339. pi.reformat()
  340. return output.getvalue()
  341. # end def reformat_string
  342. def make_backup(filename):
  343. import os, os.path
  344. backup = filename + '~'
  345. if os.path.lexists(backup):
  346. try:
  347. os.remove(backup)
  348. except OSError:
  349. print("Can't remove backup %r" % (backup,), file=sys.stderr)
  350. # end try
  351. # end if
  352. try:
  353. os.rename(filename, backup)
  354. except OSError:
  355. print("Can't rename %r to %r" % (filename, backup), file=sys.stderr)
  356. # end try
  357. # end def make_backup
  358. def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  359. with open(filename, 'r') as f:
  360. source = f.read()
  361. # end with
  362. result = complete_string(source, stepsize, tabsize, expandtabs)
  363. if source == result: return 0
  364. # end if
  365. make_backup(filename)
  366. with open(filename, 'w') as f:
  367. f.write(result)
  368. # end with
  369. return 1
  370. # end def complete_file
  371. def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  372. with open(filename, 'r') as f:
  373. source = f.read()
  374. # end with
  375. result = delete_string(source, stepsize, tabsize, expandtabs)
  376. if source == result: return 0
  377. # end if
  378. make_backup(filename)
  379. with open(filename, 'w') as f:
  380. f.write(result)
  381. # end with
  382. return 1
  383. # end def delete_file
  384. def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
  385. with open(filename, 'r') as f:
  386. source = f.read()
  387. # end with
  388. result = reformat_string(source, stepsize, tabsize, expandtabs)
  389. if source == result: return 0
  390. # end if
  391. make_backup(filename)
  392. with open(filename, 'w') as f:
  393. f.write(result)
  394. # end with
  395. return 1
  396. # end def reformat_file
  397. # Test program when called as a script
  398. usage = """
  399. usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
  400. -c : complete a correctly indented program (add #end directives)
  401. -d : delete #end directives
  402. -r : reformat a completed program (use #end directives)
  403. -s stepsize: indentation step (default %(STEPSIZE)d)
  404. -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
  405. -e : expand TABs into spaces (default OFF)
  406. [file] ... : files are changed in place, with backups in file~
  407. If no files are specified or a single - is given,
  408. the program acts as a filter (reads stdin, writes stdout).
  409. """ % vars()
  410. def error_both(op1, op2):
  411. sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
  412. sys.stderr.write(usage)
  413. sys.exit(2)
  414. # end def error_both
  415. def test():
  416. import getopt
  417. try:
  418. opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
  419. except getopt.error as msg:
  420. sys.stderr.write('Error: %s\n' % msg)
  421. sys.stderr.write(usage)
  422. sys.exit(2)
  423. # end try
  424. action = None
  425. stepsize = STEPSIZE
  426. tabsize = TABSIZE
  427. expandtabs = EXPANDTABS
  428. for o, a in opts:
  429. if o == '-c':
  430. if action: error_both(o, action)
  431. # end if
  432. action = 'complete'
  433. elif o == '-d':
  434. if action: error_both(o, action)
  435. # end if
  436. action = 'delete'
  437. elif o == '-r':
  438. if action: error_both(o, action)
  439. # end if
  440. action = 'reformat'
  441. elif o == '-s':
  442. stepsize = int(a)
  443. elif o == '-t':
  444. tabsize = int(a)
  445. elif o == '-e':
  446. expandtabs = True
  447. # end if
  448. # end for
  449. if not action:
  450. sys.stderr.write(
  451. 'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
  452. sys.stderr.write(usage)
  453. sys.exit(2)
  454. # end if
  455. if not args or args == ['-']:
  456. action = eval(action + '_filter')
  457. action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
  458. else:
  459. action = eval(action + '_file')
  460. for filename in args:
  461. action(filename, stepsize, tabsize, expandtabs)
  462. # end for
  463. # end if
  464. # end def test
  465. if __name__ == '__main__':
  466. test()
  467. # end if