driver.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
  2. # Licensed to PSF under a Contributor Agreement.
  3. # Modifications:
  4. # Copyright 2006 Google, Inc. All Rights Reserved.
  5. # Licensed to PSF under a Contributor Agreement.
  6. """Parser driver.
  7. This provides a high-level interface to parse a file into a syntax tree.
  8. """
  9. __author__ = "Guido van Rossum <guido@python.org>"
  10. __all__ = ["Driver", "load_grammar"]
  11. # Python imports
  12. import codecs
  13. import io
  14. import os
  15. import logging
  16. import pkgutil
  17. import sys
  18. # Pgen imports
  19. from . import grammar, parse, token, tokenize, pgen
  20. class Driver(object):
  21. def __init__(self, grammar, convert=None, logger=None):
  22. self.grammar = grammar
  23. if logger is None:
  24. logger = logging.getLogger()
  25. self.logger = logger
  26. self.convert = convert
  27. def parse_tokens(self, tokens, debug=False):
  28. """Parse a series of tokens and return the syntax tree."""
  29. # XXX Move the prefix computation into a wrapper around tokenize.
  30. p = parse.Parser(self.grammar, self.convert)
  31. p.setup()
  32. lineno = 1
  33. column = 0
  34. type = value = start = end = line_text = None
  35. prefix = ""
  36. for quintuple in tokens:
  37. type, value, start, end, line_text = quintuple
  38. if start != (lineno, column):
  39. assert (lineno, column) <= start, ((lineno, column), start)
  40. s_lineno, s_column = start
  41. if lineno < s_lineno:
  42. prefix += "\n" * (s_lineno - lineno)
  43. lineno = s_lineno
  44. column = 0
  45. if column < s_column:
  46. prefix += line_text[column:s_column]
  47. column = s_column
  48. if type in (tokenize.COMMENT, tokenize.NL):
  49. prefix += value
  50. lineno, column = end
  51. if value.endswith("\n"):
  52. lineno += 1
  53. column = 0
  54. continue
  55. if type == token.OP:
  56. type = grammar.opmap[value]
  57. if debug:
  58. self.logger.debug("%s %r (prefix=%r)",
  59. token.tok_name[type], value, prefix)
  60. if p.addtoken(type, value, (prefix, start)):
  61. if debug:
  62. self.logger.debug("Stop.")
  63. break
  64. prefix = ""
  65. lineno, column = end
  66. if value.endswith("\n"):
  67. lineno += 1
  68. column = 0
  69. else:
  70. # We never broke out -- EOF is too soon (how can this happen???)
  71. raise parse.ParseError("incomplete input",
  72. type, value, (prefix, start))
  73. return p.rootnode
  74. def parse_stream_raw(self, stream, debug=False):
  75. """Parse a stream and return the syntax tree."""
  76. tokens = tokenize.generate_tokens(stream.readline)
  77. return self.parse_tokens(tokens, debug)
  78. def parse_stream(self, stream, debug=False):
  79. """Parse a stream and return the syntax tree."""
  80. return self.parse_stream_raw(stream, debug)
  81. def parse_file(self, filename, encoding=None, debug=False):
  82. """Parse a file and return the syntax tree."""
  83. with io.open(filename, "r", encoding=encoding) as stream:
  84. return self.parse_stream(stream, debug)
  85. def parse_string(self, text, debug=False):
  86. """Parse a string and return the syntax tree."""
  87. tokens = tokenize.generate_tokens(io.StringIO(text).readline)
  88. return self.parse_tokens(tokens, debug)
  89. def _generate_pickle_name(gt):
  90. head, tail = os.path.splitext(gt)
  91. if tail == ".txt":
  92. tail = ""
  93. return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
  94. def load_grammar(gt="Grammar.txt", gp=None,
  95. save=True, force=False, logger=None):
  96. """Load the grammar (maybe from a pickle)."""
  97. if logger is None:
  98. logger = logging.getLogger()
  99. gp = _generate_pickle_name(gt) if gp is None else gp
  100. if force or not _newer(gp, gt):
  101. logger.info("Generating grammar tables from %s", gt)
  102. g = pgen.generate_grammar(gt)
  103. if save:
  104. logger.info("Writing grammar tables to %s", gp)
  105. try:
  106. g.dump(gp)
  107. except OSError as e:
  108. logger.info("Writing failed: %s", e)
  109. else:
  110. g = grammar.Grammar()
  111. g.load(gp)
  112. return g
  113. def _newer(a, b):
  114. """Inquire whether file a was written since file b."""
  115. if not os.path.exists(a):
  116. return False
  117. if not os.path.exists(b):
  118. return True
  119. return os.path.getmtime(a) >= os.path.getmtime(b)
  120. def load_packaged_grammar(package, grammar_source):
  121. """Normally, loads a pickled grammar by doing
  122. pkgutil.get_data(package, pickled_grammar)
  123. where *pickled_grammar* is computed from *grammar_source* by adding the
  124. Python version and using a ``.pickle`` extension.
  125. However, if *grammar_source* is an extant file, load_grammar(grammar_source)
  126. is called instead. This facilitates using a packaged grammar file when needed
  127. but preserves load_grammar's automatic regeneration behavior when possible.
  128. """
  129. if os.path.isfile(grammar_source):
  130. return load_grammar(grammar_source)
  131. pickled_name = _generate_pickle_name(os.path.basename(grammar_source))
  132. data = pkgutil.get_data(package, pickled_name)
  133. g = grammar.Grammar()
  134. g.loads(data)
  135. return g
  136. def main(*args):
  137. """Main program, when run as a script: produce grammar pickle files.
  138. Calls load_grammar for each argument, a path to a grammar text file.
  139. """
  140. if not args:
  141. args = sys.argv[1:]
  142. logging.basicConfig(level=logging.INFO, stream=sys.stdout,
  143. format='%(message)s')
  144. for gt in args:
  145. load_grammar(gt, save=True, force=True)
  146. return True
  147. if __name__ == "__main__":
  148. sys.exit(int(not main()))