Most fstrings issues should be fixed now.

2026-05-25 17:58:57 +08:00 · 2017-08-26 19:15:09 +02:00
parent 5b7a01ba62
commit 66606403c7
9 changed files with 72 additions and 21 deletions
@@ -66,14 +66,18 @@ class Grammar(object):
        :return: A syntax tree node. Typically the module.
        """
        if 'start_pos' in kwargs:
            raise TypeError("parse() got an unexpected keyworda argument.")
        return self._parse(code=code, **kwargs)
    def _parse(self, code=None, path=None, error_recovery=True,
               start_symbol='file_input', cache=False, diff_cache=False,
-               cache_path=None):
+               cache_path=None, start_pos=(1, 0)):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
        start_pos here is just a parameter internally used. Might be public
        sometime in the future.
        """
        if code is None and path is None:
            raise TypeError("Please provide either code or a path.")
@@ -121,7 +125,7 @@ class Grammar(object):
                            cache_path=cache_path)
                return new_node
-        tokens = self._tokenizer(lines)
+        tokens = self._tokenizer(lines, start_pos)
        p = self._parser(
            self._pgen_grammar,
@@ -190,8 +194,8 @@ class PythonGrammar(Grammar):
        )
        self.version_info = version_info
-    def _tokenize_lines(self, lines):
+    def _tokenize_lines(self, lines, start_pos):
-        return tokenize_lines(lines, self.version_info)
+        return tokenize_lines(lines, self.version_info, start_pos=start_pos)
    def _tokenize(self, code):
        # Used by Jedi.
@@ -25,9 +25,9 @@ class ParserSyntaxError(Exception):
    May be raised as an exception.
    """
-    def __init__(self, message, position):
+    def __init__(self, message, error_leaf):
        self.message = message
-        self.position = position
+        self.error_leaf = error_leaf
 class BaseParser(object):
@@ -60,7 +60,8 @@ class BaseParser(object):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
-            raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
+            error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
            raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
    def convert_node(self, pgen_grammar, type_, children):
        # TODO REMOVE symbol, we don't want type here.
@@ -6,6 +6,7 @@ from contextlib import contextmanager
 from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
 from parso.python.tree import search_ancestor
 from parso.parser import ParserSyntaxError
 _BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
 _STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist')
@@ -847,6 +848,7 @@ class _FStringRule(SyntaxRule):
    message_unterminated_string = "f-string: unterminated string"  # f'{"}'
    message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
    message_incomplete = "f-string: expecting '}'"  # f'{'
    message_syntax = "invalid syntax"
    @classmethod
    def _load_grammar(cls):
@@ -883,12 +885,31 @@ class _FStringRule(SyntaxRule):
        if '#' in value:
            self.add_issue(python_expr, message=self.message_comment)
            return
        if re.match('\s*$', value) is not None:
            self.add_issue(python_expr, message=self.message_empty)
            return
        # This is now nested parsing. We parsed the fstring and now
        # we're parsing Python again.
-        module = self._normalizer.grammar.parse(value)
+        try:
-        parsed_expr = module.children[0]
+            # CPython has a bit of a special ways to parse Python code within
-        if parsed_expr.type == 'endmarker':
+            # f-strings. It wraps the code in brackets to make sure that
-            self.add_issue(python_expr, message=self.message_empty)
+            # whitespace doesn't make problems (indentation/newlines).
            # Just use that algorithm as well here and adapt start positions.
            start_pos = python_expr.start_pos
            start_pos = start_pos[0], start_pos[1] - 1
            eval_input = self._normalizer.grammar._parse(
                '(%s)' % value,
                start_symbol='eval_input',
                start_pos=start_pos,
                error_recovery=False
            )
        except ParserSyntaxError as e:
            self.add_issue(e.error_leaf, message=self.message_syntax)
            return
        issues = self._normalizer.grammar.iter_errors(eval_input)
        self._normalizer.issues += issues
    def _check_format_spec(self, format_spec):
        for expression in format_spec.children[1:]:
@@ -154,7 +154,7 @@ def _tokenize(code, start_pos=(1, 0)):
                        )
                        start = len(code)
                        break
-                    expression += found + code[start:index]
+                    expression += found + code[start:index+1]
                    start = index + 1
                elif found == '!' and len(code) > start and code[start] == '=':
                    # This is a python `!=` and not a conversion.
@@ -11,6 +11,8 @@
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER
 # Dave: A modification to parse f-strings.
 testlist_comp_with_endmarker: testlist NEWLINE* ENDMARKER
 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
@@ -1,7 +1,6 @@
 from parso.python import tree
 from parso.python import tokenize
 from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
-                                STRING, tok_name)
+                                STRING, tok_name, NAME)
 from parso.parser import BaseParser
 from parso.pgen2.parse import token_to_ilabel
@@ -117,7 +116,7 @@ class Parser(BaseParser):
    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
-        if type == tokenize.NAME:
+        if type == NAME:
            if value in pgen_grammar.keywords:
                return tree.Keyword(value, start_pos, prefix)
            else:
@@ -247,7 +246,7 @@ class Parser(BaseParser):
    def _recovery_tokenize(self, tokens):
        for typ, value, start_pos, prefix in tokens:
-            # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
+            # print(tok_name[typ], repr(value), start_pos, repr(prefix))
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
                # we might omit them in the wrong place.
@@ -222,13 +222,13 @@ class PythonToken(Token):
                self._replace(type=self._get_type_name()))
-def tokenize(code, version_info):
+def tokenize(code, version_info, start_pos=(1, 0)):
    """Generate tokens from a the source code (string)."""
    lines = split_lines(code, keepends=True)
-    return tokenize_lines(lines, version_info)
+    return tokenize_lines(lines, version_info, start_pos=start_pos)
-def tokenize_lines(lines, version_info):
+def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    """
    A heavily modified Python standard library tokenizer.
@@ -252,14 +252,22 @@ def tokenize_lines(lines, version_info):
    prefix = ''  # Should never be required, but here for safety
    additional_prefix = ''
    first = True
-    for lnum, line in enumerate(lines, 1):  # loop over lines in stream
+    lnum = start_pos[0] - 1
    for line in lines:  # loop over lines in stream
        lnum += 1
        pos, max = 0, len(line)
        if first:
            if line.startswith(BOM_UTF8_STRING):
                additional_prefix = BOM_UTF8_STRING
                line = line[1:]
            # Fake that the part before was already parsed.
            line = '^' * start_pos[1] + line
            pos = start_pos[1]
            max += start_pos[1]
            first = False
        pos, max = 0, len(line)
        if contstr:                                         # continued string
            endmatch = endprog.match(line)
            if endmatch:
@@ -150,6 +150,13 @@ FAILING_EXAMPLES = [
    "f'{'",
    "f'}'",
    "f'{\"}'",
    "f'{\"}'",
    # Now nested parsing
    "f'{1+}'",
    "f'{continue}'",
    "f'{1;1}'",
    "f'{a=3}'",
    "f'{b\"\" \"\"}'",
 ]
 GLOBAL_NONLOCAL_ERROR = [
@@ -237,3 +237,12 @@ def test_too_many_levels_of_indentation():
    base = 'def x():\n if x:\n'
    assert not _get_error_list(build_nested('pass', 49, base=base))
    assert _get_error_list(build_nested('pass', 50, base=base))
@pytest.mark.parametrize(
    'code', [
        "f'{*args,}'",
    ]
 )
 def test_valid_fstrings(code):
    assert not _get_error_list(code, version='3.6')