Change the pgen2 parser and its driver so that it can be accessed easily from the outside. This is a minor change and will allow Jedis tokenizer to work with pgen2.

2026-02-18 07:54:32 +08:00 · 2014-11-26 15:05:36 +01:00
parent cd1e07a532
commit 427056a22d
2 changed files with 23 additions and 12 deletions
--- a/jedi/parser/pgen2/init.py
+++ b/jedi/parser/pgen2/init.py
@@ -31,15 +31,16 @@ class Driver(object):
        self.error_recovery = error_recovery

    def parse_tokens(self, tokens):
+        p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery)
+        return p.parse(self._tokenize(tokens))
+
+    def _tokenize(self, tokens):
        """Parse a series of tokens and return the syntax tree."""
        # XXX Move the prefix computation into a wrapper around tokenize.
-        p = parse.Parser(self.grammar, self.convert_node, self.convert_leaf, self.error_recovery)
        lineno = 1
        column = 0
-        type = value = start = end = line_text = None
        prefix = ""
-        for quintuple in tokens:
-            type, value, start, end, line_text = quintuple
+        for type, value, start, end, line_text in tokens:
            if start != (lineno, column):
                assert (lineno, column) <= start, ((lineno, column), start)
                s_lineno, s_column = start
@@ -60,18 +61,12 @@ class Driver(object):
            if type == token.OP:
                type = grammar.opmap[value]
            #self.logger.debug("%s %r (prefix=%r)", token.tok_name[type], value, prefix)
-            if p.addtoken(type, value, prefix, start):
-                break
+            yield type, value, prefix, start
            prefix = ""
            lineno, column = end
            if value.endswith("\n"):
                lineno += 1
                column = 0
-        else:
-            # We never broke out -- EOF is too soon (how can this happen???)
-            raise parse.ParseError("incomplete input",
-                                   type, value, (prefix, start))
-        return p.rootnode

    def parse_string(self, text):
        """Parse a string and return the syntax tree."""
--- a/jedi/parser/pgen2/parse.py
+++ b/jedi/parser/pgen2/parse.py
@@ -97,10 +97,26 @@ class Parser(object):
        self.rootnode = None
        self.error_recovery = error_recovery

+    def parse(self, tokens):
+        for type, value, prefix, start_pos in tokens:
+            if self.addtoken(type, value, prefix, start_pos):
+                break
+        else:
+            # We never broke out -- EOF is too soon (how can this happen???)
+            # Hint: It probably doesn't since there's an ENDMARKER.
+            raise ParseError("incomplete input", type, value, start_pos)
+        return self.rootnode
+
    def addtoken(self, type, value, prefix, start_pos):
        """Add a token; return True iff this is the end of the program."""
        # Map from token to label
-        ilabel = self.classify(type, value, start_pos)
+        try:
+            ilabel = self.classify(type, value, start_pos)
+        except ParseError:
+            # Currently we ignore tokens like `?`.
+            print('invalid token', token.tok_name[type], value)
+            return
+
        # Loop until the token is shifted; may raise exceptions
        while True:
            dfa, state, node = self.stack[-1]