Most fstrings issues should be fixed now.

2026-01-28 05:42:30 +08:00 · 2017-08-26 19:15:09 +02:00
parent 5b7a01ba62
commit 66606403c7
9 changed files with 72 additions and 21 deletions
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -66,14 +66,18 @@ class Grammar(object):

        :return: A syntax tree node. Typically the module.
        """
+        if 'start_pos' in kwargs:
+            raise TypeError("parse() got an unexpected keyworda argument.")
        return self._parse(code=code, **kwargs)

    def _parse(self, code=None, path=None, error_recovery=True,
               start_symbol='file_input', cache=False, diff_cache=False,
-               cache_path=None):
+               cache_path=None, start_pos=(1, 0)):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
+        start_pos here is just a parameter internally used. Might be public
+        sometime in the future.
        """
        if code is None and path is None:
            raise TypeError("Please provide either code or a path.")
@@ -121,7 +125,7 @@ class Grammar(object):
                            cache_path=cache_path)
                return new_node

-        tokens = self._tokenizer(lines)
+        tokens = self._tokenizer(lines, start_pos)

        p = self._parser(
            self._pgen_grammar,
@@ -190,8 +194,8 @@ class PythonGrammar(Grammar):
        )
        self.version_info = version_info

-    def _tokenize_lines(self, lines):
-        return tokenize_lines(lines, self.version_info)
+    def _tokenize_lines(self, lines, start_pos):
+        return tokenize_lines(lines, self.version_info, start_pos=start_pos)

    def _tokenize(self, code):
        # Used by Jedi.
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -25,9 +25,9 @@ class ParserSyntaxError(Exception):

    May be raised as an exception.
    """
-    def __init__(self, message, position):
+    def __init__(self, message, error_leaf):
        self.message = message
-        self.position = position
+        self.error_leaf = error_leaf


 class BaseParser(object):
@@ -60,7 +60,8 @@ class BaseParser(object):
        if self._error_recovery:
            raise NotImplementedError("Error Recovery is not implemented")
        else:
-            raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
+            error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
+            raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)

    def convert_node(self, pgen_grammar, type_, children):
        # TODO REMOVE symbol, we don't want type here.
--- a/parso/python/errors.py
+++ b/parso/python/errors.py
@@ -6,6 +6,7 @@ from contextlib import contextmanager

 from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
 from parso.python.tree import search_ancestor
+from parso.parser import ParserSyntaxError

 _BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
 _STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist')
@@ -847,6 +848,7 @@ class _FStringRule(SyntaxRule):
    message_unterminated_string = "f-string: unterminated string"  # f'{"}'
    message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
    message_incomplete = "f-string: expecting '}'"  # f'{'
+    message_syntax = "invalid syntax"

    @classmethod
    def _load_grammar(cls):
@@ -883,12 +885,31 @@ class _FStringRule(SyntaxRule):
        if '#' in value:
            self.add_issue(python_expr, message=self.message_comment)
            return
+        if re.match('\s*$', value) is not None:
+            self.add_issue(python_expr, message=self.message_empty)
+            return
+
        # This is now nested parsing. We parsed the fstring and now
        # we're parsing Python again.
-        module = self._normalizer.grammar.parse(value)
-        parsed_expr = module.children[0]
-        if parsed_expr.type == 'endmarker':
-            self.add_issue(python_expr, message=self.message_empty)
+        try:
+            # CPython has a bit of a special ways to parse Python code within
+            # f-strings. It wraps the code in brackets to make sure that
+            # whitespace doesn't make problems (indentation/newlines).
+            # Just use that algorithm as well here and adapt start positions.
+            start_pos = python_expr.start_pos
+            start_pos = start_pos[0], start_pos[1] - 1
+            eval_input = self._normalizer.grammar._parse(
+                '(%s)' % value,
+                start_symbol='eval_input',
+                start_pos=start_pos,
+                error_recovery=False
+            )
+        except ParserSyntaxError as e:
+            self.add_issue(e.error_leaf, message=self.message_syntax)
+            return
+
+        issues = self._normalizer.grammar.iter_errors(eval_input)
+        self._normalizer.issues += issues

    def _check_format_spec(self, format_spec):
        for expression in format_spec.children[1:]:
--- a/parso/python/fstring.py
+++ b/parso/python/fstring.py
@@ -154,7 +154,7 @@ def _tokenize(code, start_pos=(1, 0)):
                        )
                        start = len(code)
                        break
-                    expression += found + code[start:index]
+                    expression += found + code[start:index+1]
                    start = index + 1
                elif found == '!' and len(code) > start and code[start] == '=':
                    # This is a python `!=` and not a conversion.
--- a/parso/python/grammar36.txt
+++ b/parso/python/grammar36.txt
@@ -11,6 +11,8 @@
 single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
 file_input: (NEWLINE | stmt)* ENDMARKER
 eval_input: testlist NEWLINE* ENDMARKER
+# Dave: A modification to parse f-strings.
+testlist_comp_with_endmarker: testlist NEWLINE* ENDMARKER

 decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
 decorators: decorator+
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -1,7 +1,6 @@
 from parso.python import tree
-from parso.python import tokenize
 from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
-                                STRING, tok_name)
+                                STRING, tok_name, NAME)
 from parso.parser import BaseParser
 from parso.pgen2.parse import token_to_ilabel

@@ -117,7 +116,7 @@ class Parser(BaseParser):

    def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
        # print('leaf', repr(value), token.tok_name[type])
-        if type == tokenize.NAME:
+        if type == NAME:
            if value in pgen_grammar.keywords:
                return tree.Keyword(value, start_pos, prefix)
            else:
@@ -247,7 +246,7 @@ class Parser(BaseParser):

    def _recovery_tokenize(self, tokens):
        for typ, value, start_pos, prefix in tokens:
-            # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
+            # print(tok_name[typ], repr(value), start_pos, repr(prefix))
            if typ == DEDENT:
                # We need to count indents, because if we just omit any DEDENT,
                # we might omit them in the wrong place.
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -222,13 +222,13 @@ class PythonToken(Token):
                self._replace(type=self._get_type_name()))


-def tokenize(code, version_info):
+def tokenize(code, version_info, start_pos=(1, 0)):
    """Generate tokens from a the source code (string)."""
    lines = split_lines(code, keepends=True)
-    return tokenize_lines(lines, version_info)
+    return tokenize_lines(lines, version_info, start_pos=start_pos)


-def tokenize_lines(lines, version_info):
+def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    """
    A heavily modified Python standard library tokenizer.

@@ -252,14 +252,22 @@ def tokenize_lines(lines, version_info):
    prefix = ''  # Should never be required, but here for safety
    additional_prefix = ''
    first = True
-    for lnum, line in enumerate(lines, 1):  # loop over lines in stream
+    lnum = start_pos[0] - 1
+    for line in lines:  # loop over lines in stream
+        lnum += 1
+        pos, max = 0, len(line)
        if first:
            if line.startswith(BOM_UTF8_STRING):
                additional_prefix = BOM_UTF8_STRING
                line = line[1:]
+
+            # Fake that the part before was already parsed.
+            line = '^' * start_pos[1] + line
+            pos = start_pos[1]
+            max += start_pos[1]
+
            first = False

-        pos, max = 0, len(line)
        if contstr:                                         # continued string
            endmatch = endprog.match(line)
            if endmatch: