highlight.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. #!/usr/bin/env python3
  2. '''Add syntax highlighting to Python source code'''
  3. __author__ = 'Raymond Hettinger'
  4. import builtins
  5. import functools
  6. import html as html_module
  7. import keyword
  8. import re
  9. import tokenize
  10. #### Analyze Python Source #################################
  11. def is_builtin(s):
  12. 'Return True if s is the name of a builtin'
  13. return hasattr(builtins, s)
  14. def combine_range(lines, start, end):
  15. 'Join content from a range of lines between start and end'
  16. (srow, scol), (erow, ecol) = start, end
  17. if srow == erow:
  18. return lines[srow-1][scol:ecol], end
  19. rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]]
  20. return ''.join(rows), end
  21. def analyze_python(source):
  22. '''Generate and classify chunks of Python for syntax highlighting.
  23. Yields tuples in the form: (category, categorized_text).
  24. '''
  25. lines = source.splitlines(True)
  26. lines.append('')
  27. readline = functools.partial(next, iter(lines), '')
  28. kind = tok_str = ''
  29. tok_type = tokenize.COMMENT
  30. written = (1, 0)
  31. for tok in tokenize.generate_tokens(readline):
  32. prev_tok_type, prev_tok_str = tok_type, tok_str
  33. tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok
  34. kind = ''
  35. if tok_type == tokenize.COMMENT:
  36. kind = 'comment'
  37. elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@':
  38. kind = 'operator'
  39. elif tok_type == tokenize.STRING:
  40. kind = 'string'
  41. if prev_tok_type == tokenize.INDENT or scol==0:
  42. kind = 'docstring'
  43. elif tok_type == tokenize.NAME:
  44. if tok_str in ('def', 'class', 'import', 'from'):
  45. kind = 'definition'
  46. elif prev_tok_str in ('def', 'class'):
  47. kind = 'defname'
  48. elif keyword.iskeyword(tok_str):
  49. kind = 'keyword'
  50. elif is_builtin(tok_str) and prev_tok_str != '.':
  51. kind = 'builtin'
  52. if kind:
  53. text, written = combine_range(lines, written, (srow, scol))
  54. yield '', text
  55. text, written = tok_str, (erow, ecol)
  56. yield kind, text
  57. line_upto_token, written = combine_range(lines, written, (erow, ecol))
  58. yield '', line_upto_token
  59. #### Raw Output ###########################################
  60. def raw_highlight(classified_text):
  61. 'Straight text display of text classifications'
  62. result = []
  63. for kind, text in classified_text:
  64. result.append('%15s: %r\n' % (kind or 'plain', text))
  65. return ''.join(result)
  66. #### ANSI Output ###########################################
  67. default_ansi = {
  68. 'comment': ('\033[0;31m', '\033[0m'),
  69. 'string': ('\033[0;32m', '\033[0m'),
  70. 'docstring': ('\033[0;32m', '\033[0m'),
  71. 'keyword': ('\033[0;33m', '\033[0m'),
  72. 'builtin': ('\033[0;35m', '\033[0m'),
  73. 'definition': ('\033[0;33m', '\033[0m'),
  74. 'defname': ('\033[0;34m', '\033[0m'),
  75. 'operator': ('\033[0;33m', '\033[0m'),
  76. }
  77. def ansi_highlight(classified_text, colors=default_ansi):
  78. 'Add syntax highlighting to source code using ANSI escape sequences'
  79. # http://en.wikipedia.org/wiki/ANSI_escape_code
  80. result = []
  81. for kind, text in classified_text:
  82. opener, closer = colors.get(kind, ('', ''))
  83. result += [opener, text, closer]
  84. return ''.join(result)
  85. #### HTML Output ###########################################
  86. def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'):
  87. 'Convert classified text to an HTML fragment'
  88. result = [opener]
  89. for kind, text in classified_text:
  90. if kind:
  91. result.append('<span class="%s">' % kind)
  92. result.append(html_module.escape(text))
  93. if kind:
  94. result.append('</span>')
  95. result.append(closer)
  96. return ''.join(result)
  97. default_css = {
  98. '.comment': '{color: crimson;}',
  99. '.string': '{color: forestgreen;}',
  100. '.docstring': '{color: forestgreen; font-style:italic;}',
  101. '.keyword': '{color: darkorange;}',
  102. '.builtin': '{color: purple;}',
  103. '.definition': '{color: darkorange; font-weight:bold;}',
  104. '.defname': '{color: blue;}',
  105. '.operator': '{color: brown;}',
  106. }
  107. default_html = '''\
  108. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
  109. "http://www.w3.org/TR/html4/strict.dtd">
  110. <html>
  111. <head>
  112. <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
  113. <title> {title} </title>
  114. <style type="text/css">
  115. {css}
  116. </style>
  117. </head>
  118. <body>
  119. {body}
  120. </body>
  121. </html>
  122. '''
  123. def build_html_page(classified_text, title='python',
  124. css=default_css, html=default_html):
  125. 'Create a complete HTML page with colorized source code'
  126. css_str = '\n'.join(['%s %s' % item for item in css.items()])
  127. result = html_highlight(classified_text)
  128. title = html_module.escape(title)
  129. return html.format(title=title, css=css_str, body=result)
  130. #### LaTeX Output ##########################################
  131. default_latex_commands = {
  132. 'comment': r'{\color{red}#1}',
  133. 'string': r'{\color{ForestGreen}#1}',
  134. 'docstring': r'{\emph{\color{ForestGreen}#1}}',
  135. 'keyword': r'{\color{orange}#1}',
  136. 'builtin': r'{\color{purple}#1}',
  137. 'definition': r'{\color{orange}#1}',
  138. 'defname': r'{\color{blue}#1}',
  139. 'operator': r'{\color{brown}#1}',
  140. }
  141. default_latex_document = r'''
  142. \documentclass{article}
  143. \usepackage{alltt}
  144. \usepackage{upquote}
  145. \usepackage{color}
  146. \usepackage[usenames,dvipsnames]{xcolor}
  147. \usepackage[cm]{fullpage}
  148. %(macros)s
  149. \begin{document}
  150. \center{\LARGE{%(title)s}}
  151. \begin{alltt}
  152. %(body)s
  153. \end{alltt}
  154. \end{document}
  155. '''
  156. def alltt_escape(s):
  157. 'Replace backslash and braces with their escaped equivalents'
  158. xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'}
  159. return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s)
  160. def latex_highlight(classified_text, title = 'python',
  161. commands = default_latex_commands,
  162. document = default_latex_document):
  163. 'Create a complete LaTeX document with colorized source code'
  164. macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items())
  165. result = []
  166. for kind, text in classified_text:
  167. if kind:
  168. result.append(r'\py%s{' % kind)
  169. result.append(alltt_escape(text))
  170. if kind:
  171. result.append('}')
  172. return default_latex_document % dict(title=title, macros=macros, body=''.join(result))
  173. if __name__ == '__main__':
  174. import argparse
  175. import os.path
  176. import sys
  177. import textwrap
  178. import webbrowser
  179. parser = argparse.ArgumentParser(
  180. description = 'Add syntax highlighting to Python source code',
  181. formatter_class=argparse.RawDescriptionHelpFormatter,
  182. epilog = textwrap.dedent('''
  183. examples:
  184. # Show syntax highlighted code in the terminal window
  185. $ ./highlight.py myfile.py
  186. # Colorize myfile.py and display in a browser
  187. $ ./highlight.py -b myfile.py
  188. # Create an HTML section to embed in an existing webpage
  189. ./highlight.py -s myfile.py
  190. # Create a complete HTML file
  191. $ ./highlight.py -c myfile.py > myfile.html
  192. # Create a PDF using LaTeX
  193. $ ./highlight.py -l myfile.py | pdflatex
  194. '''))
  195. parser.add_argument('sourcefile', metavar = 'SOURCEFILE',
  196. help = 'file containing Python sourcecode')
  197. parser.add_argument('-b', '--browser', action = 'store_true',
  198. help = 'launch a browser to show results')
  199. parser.add_argument('-c', '--complete', action = 'store_true',
  200. help = 'build a complete html webpage')
  201. parser.add_argument('-l', '--latex', action = 'store_true',
  202. help = 'build a LaTeX document')
  203. parser.add_argument('-r', '--raw', action = 'store_true',
  204. help = 'raw parse of categorized text')
  205. parser.add_argument('-s', '--section', action = 'store_true',
  206. help = 'show an HTML section rather than a complete webpage')
  207. args = parser.parse_args()
  208. if args.section and (args.browser or args.complete):
  209. parser.error('The -s/--section option is incompatible with '
  210. 'the -b/--browser or -c/--complete options')
  211. sourcefile = args.sourcefile
  212. with open(sourcefile) as f:
  213. source = f.read()
  214. classified_text = analyze_python(source)
  215. if args.raw:
  216. encoded = raw_highlight(classified_text)
  217. elif args.complete or args.browser:
  218. encoded = build_html_page(classified_text, title=sourcefile)
  219. elif args.section:
  220. encoded = html_highlight(classified_text)
  221. elif args.latex:
  222. encoded = latex_highlight(classified_text, title=sourcefile)
  223. else:
  224. encoded = ansi_highlight(classified_text)
  225. if args.browser:
  226. htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html'
  227. with open(htmlfile, 'w') as f:
  228. f.write(encoded)
  229. webbrowser.open('file://' + os.path.abspath(htmlfile))
  230. else:
  231. sys.stdout.write(encoded)