From 427056a22d3b1a225eb80188bcb9df8a8b7bae58 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 26 Nov 2014 15:05:36 +0100 Subject: [PATCH] Change the pgen2 parser and its driver so that it can be accessed easily from the outside. This is a minor change and will allow Jedis tokenizer to work with pgen2. --- jedi/parser/pgen2/__init__.py | 17 ++++++----------- jedi/parser/pgen2/parse.py | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/jedi/parser/pgen2/__init__.py b/jedi/parser/pgen2/__init__.py index 25e3fce1..05f71dfa 100644 --- a/jedi/parser/pgen2/__init__.py +++ b/jedi/parser/pgen2/__init__.py @@ -31,15 +31,16 @@ class Driver(object): self.error_recovery = error_recovery def parse_tokens(self, tokens): + p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery) + return p.parse(self._tokenize(tokens)) + + def _tokenize(self, tokens): """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. - p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery) lineno = 1 column = 0 - type = value = start = end = line_text = None prefix = "" - for quintuple in tokens: - type, value, start, end, line_text = quintuple + for type, value, start, end, line_text in tokens: if start != (lineno, column): assert (lineno, column) <= start, ((lineno, column), start) s_lineno, s_column = start @@ -60,18 +61,12 @@ class Driver(object): if type == token.OP: type = grammar.opmap[value] #self.logger.debug("%s %r (prefix=%r)", token.tok_name[type], value, prefix) - if p.addtoken(type, value, prefix, start): - break + yield type, value, prefix, start prefix = "" lineno, column = end if value.endswith("\n"): lineno += 1 column = 0 - else: - # We never broke out -- EOF is too soon (how can this happen???) - raise parse.ParseError("incomplete input", - type, value, (prefix, start)) - return p.rootnode def parse_string(self, text): """Parse a string and return the syntax tree.""" diff --git a/jedi/parser/pgen2/parse.py b/jedi/parser/pgen2/parse.py index 4a38b72e..aa95d16c 100644 --- a/jedi/parser/pgen2/parse.py +++ b/jedi/parser/pgen2/parse.py @@ -97,10 +97,26 @@ class Parser(object): self.rootnode = None self.error_recovery = error_recovery + def parse(self, tokens): + for type, value, prefix, start_pos in tokens: + if self.addtoken(type, value, prefix, start_pos): + break + else: + # We never broke out -- EOF is too soon (how can this happen???) + # Hint: It probably doesn't since there's an ENDMARKER. + raise ParseError("incomplete input", type, value, start_pos) + return self.rootnode + def addtoken(self, type, value, prefix, start_pos): """Add a token; return True iff this is the end of the program.""" # Map from token to label - ilabel = self.classify(type, value, start_pos) + try: + ilabel = self.classify(type, value, start_pos) + except ParseError: + # Currently we ignore tokens like `?`. + print('invalid token', token.tok_name[type], value) + return + # Loop until the token is shifted; may raise exceptions while True: dfa, state, node = self.stack[-1]