Make some parsing work.

2026-05-19 23:10:16 +08:00 · 2017-08-24 01:39:17 +02:00
parent 033e880408
commit 3c3e7f5317
2 changed files with 35 additions and 13 deletions
@@ -199,21 +199,24 @@ class PythonGrammar(Grammar):
 class PythonFStringGrammar(Grammar):
    _token_namespace = fstring.TokenNamespace
    def __init__(self):
        super(PythonFStringGrammar, self).__init__(
            text=fstring.GRAMMAR,
-            tokenizer=fstring.tokenize
+            tokenizer=fstring.tokenize,
            parser=fstring.Parser
        )
    def parse(self, code, **kwargs):
        return self._parse(code, **kwargs)
    def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
-        tokens = self._tokenizer(lines)
+        tokens = self._tokenizer(code, start_pos=start_pos)
        p = self._parser(
            self._pgen_grammar,
            error_recovery=error_recovery,
-            start_symbol=start_symbol
+            start_symbol=fstring.START_SYMBOL,
        )
        return p.parse(tokens=tokens)
@@ -3,12 +3,13 @@ import re
 from parso.utils import PythonVersionInfo
 from parso.python.tokenize import Token
 from parso.python import token
 from parso import parser
 version36 = PythonVersionInfo(3, 6)
 class TokenNamespace:
-    LBRACE = token.LBRACE,
+    LBRACE = token.LBRACE
    RBRACE = token.RBRACE
    ENDMARKER = token.ENDMARKER
    ERRORTOKEN = token.ERRORTOKEN
@@ -25,18 +26,20 @@ class TokenNamespace:
            return cls.RBRACE
        elif string == '!':
            return cls.EXCLAMATION_MARK
        elif string == ':':
            return cls.COLON
        return getattr(cls, string)
 START_SYMBOL = 'fstring'
 GRAMMAR = """
-fstring: expressions ENDMARKER
+fstring: expression* ENDMARKER
-expressions: expression*
+expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expression* ] '}'
 expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ ':' expressions ] '}'
 """
 _prefix = r'((?:[^{}]+|\}\}|\{\{)*)'
 _expr = _prefix + r'(\{|\}|$)'
-_in_expr = r'[^{}\[\]:"\'!]*(.?)'
+_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
 # There's only one conversion character allowed. But the rules have to be
 # checked later anyway, so allow more here. This makes error recovery nicer.
 _conversion = r'([^={}:]+)(.?)'
@@ -46,13 +49,17 @@ _compiled_in_expr = re.compile(_in_expr)
 _compiled_conversion = re.compile(_conversion)
-def tokenize(code, start_pos=(1, 0)):
+def tokenize(*args, **kwargs):
    for t in _tokenize(*args, **kwargs):
        print(t)
        yield t
 def _tokenize(code, start_pos=(1, 0)):
    def tok(value, type=None, prefix=''):
        if type is None:
-            type = TokenNamespace.generate_token_id(found)
+            type = TokenNamespace.generate_token_id(value)
        line = column=1
        return Token(type, value, (line, column), prefix)
    code = ''
    start = 0
    while True:
        match = _compiled_expr.match(code, start)
@@ -74,7 +81,8 @@ def tokenize(code, start_pos=(1, 0)):
            curly_count = 0
            while True:
                expr_match = _compiled_in_expr.match(code, start)
-                expression += expr_match.group(0)
+                print(start, expr_match.group(1), expr_match.groups())
                expression += expr_match.group(1)
                found = expr_match.group(2)
                start = expr_match.end()
@@ -120,7 +128,7 @@ def tokenize(code, start_pos=(1, 0)):
                conversion_match = _compiled_conversion.match(code, start)
                found = conversion_match.group(2)
                start = conversion_match.end()
-                yield tok(conversion_match.group(1))
+                yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
                if found:
                    yield tok(found)
@@ -128,3 +136,14 @@ def tokenize(code, start_pos=(1, 0)):
            # basically new tokens.
    yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
 class Parser(parser.BaseParser):
    def parse(self, tokens):
        node = super(Parser, self).parse(tokens)
        if isinstance(node, self.default_leaf):  # Is an endmarker.
            # If there's no curly braces we get back a non-module. We always
            # want an fstring.
            node = self.default_node('fstring', [node])
        return node