Change the pgen2 parser and its driver so that it can be accessed easily from the outside. This is a minor change and will allow Jedis tokenizer to work with pgen2.

This commit is contained in:
Dave Halter
2014-11-26 15:05:36 +01:00
parent cd1e07a532
commit 427056a22d
2 changed files with 23 additions and 12 deletions

View File

@@ -31,15 +31,16 @@ class Driver(object):
self.error_recovery = error_recovery
def parse_tokens(self, tokens):
p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery)
return p.parse(self._tokenize(tokens))
def _tokenize(self, tokens):
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery)
lineno = 1
column = 0
type = value = start = end = line_text = None
prefix = ""
for quintuple in tokens:
type, value, start, end, line_text = quintuple
for type, value, start, end, line_text in tokens:
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
s_lineno, s_column = start
@@ -60,18 +61,12 @@ class Driver(object):
if type == token.OP:
type = grammar.opmap[value]
#self.logger.debug("%s %r (prefix=%r)", token.tok_name[type], value, prefix)
if p.addtoken(type, value, prefix, start):
break
yield type, value, prefix, start
prefix = ""
lineno, column = end
if value.endswith("\n"):
lineno += 1
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
raise parse.ParseError("incomplete input",
type, value, (prefix, start))
return p.rootnode
def parse_string(self, text):
"""Parse a string and return the syntax tree."""

View File

@@ -97,10 +97,26 @@ class Parser(object):
self.rootnode = None
self.error_recovery = error_recovery
def parse(self, tokens):
for type, value, prefix, start_pos in tokens:
if self.addtoken(type, value, prefix, start_pos):
break
else:
# We never broke out -- EOF is too soon (how can this happen???)
# Hint: It probably doesn't since there's an ENDMARKER.
raise ParseError("incomplete input", type, value, start_pos)
return self.rootnode
def addtoken(self, type, value, prefix, start_pos):
"""Add a token; return True iff this is the end of the program."""
# Map from token to label
ilabel = self.classify(type, value, start_pos)
try:
ilabel = self.classify(type, value, start_pos)
except ParseError:
# Currently we ignore tokens like `?`.
print('invalid token', token.tok_name[type], value)
return
# Loop until the token is shifted; may raise exceptions
while True:
dfa, state, node = self.stack[-1]