diff --git a/parso/grammar.py b/parso/grammar.py index 7af2064..0949280 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -66,14 +66,18 @@ class Grammar(object): :return: A syntax tree node. Typically the module. """ + if 'start_pos' in kwargs: + raise TypeError("parse() got an unexpected keyworda argument.") return self._parse(code=code, **kwargs) def _parse(self, code=None, path=None, error_recovery=True, start_symbol='file_input', cache=False, diff_cache=False, - cache_path=None): + cache_path=None, start_pos=(1, 0)): """ Wanted python3.5 * operator and keyword only arguments. Therefore just wrap it all. + start_pos here is just a parameter internally used. Might be public + sometime in the future. """ if code is None and path is None: raise TypeError("Please provide either code or a path.") @@ -121,7 +125,7 @@ class Grammar(object): cache_path=cache_path) return new_node - tokens = self._tokenizer(lines) + tokens = self._tokenizer(lines, start_pos) p = self._parser( self._pgen_grammar, @@ -190,8 +194,8 @@ class PythonGrammar(Grammar): ) self.version_info = version_info - def _tokenize_lines(self, lines): - return tokenize_lines(lines, self.version_info) + def _tokenize_lines(self, lines, start_pos): + return tokenize_lines(lines, self.version_info, start_pos=start_pos) def _tokenize(self, code): # Used by Jedi. diff --git a/parso/parser.py b/parso/parser.py index f1a7e9c..555ebc7 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -25,9 +25,9 @@ class ParserSyntaxError(Exception): May be raised as an exception. """ - def __init__(self, message, position): + def __init__(self, message, error_leaf): self.message = message - self.position = position + self.error_leaf = error_leaf class BaseParser(object): @@ -60,7 +60,8 @@ class BaseParser(object): if self._error_recovery: raise NotImplementedError("Error Recovery is not implemented") else: - raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos) + error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix) + raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) def convert_node(self, pgen_grammar, type_, children): # TODO REMOVE symbol, we don't want type here. diff --git a/parso/python/errors.py b/parso/python/errors.py index c5eacd6..82e6878 100644 --- a/parso/python/errors.py +++ b/parso/python/errors.py @@ -6,6 +6,7 @@ from contextlib import contextmanager from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule from parso.python.tree import search_ancestor +from parso.parser import ParserSyntaxError _BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') _STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist') @@ -847,6 +848,7 @@ class _FStringRule(SyntaxRule): message_unterminated_string = "f-string: unterminated string" # f'{"}' message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'" message_incomplete = "f-string: expecting '}'" # f'{' + message_syntax = "invalid syntax" @classmethod def _load_grammar(cls): @@ -883,12 +885,31 @@ class _FStringRule(SyntaxRule): if '#' in value: self.add_issue(python_expr, message=self.message_comment) return + if re.match('\s*$', value) is not None: + self.add_issue(python_expr, message=self.message_empty) + return + # This is now nested parsing. We parsed the fstring and now # we're parsing Python again. - module = self._normalizer.grammar.parse(value) - parsed_expr = module.children[0] - if parsed_expr.type == 'endmarker': - self.add_issue(python_expr, message=self.message_empty) + try: + # CPython has a bit of a special ways to parse Python code within + # f-strings. It wraps the code in brackets to make sure that + # whitespace doesn't make problems (indentation/newlines). + # Just use that algorithm as well here and adapt start positions. + start_pos = python_expr.start_pos + start_pos = start_pos[0], start_pos[1] - 1 + eval_input = self._normalizer.grammar._parse( + '(%s)' % value, + start_symbol='eval_input', + start_pos=start_pos, + error_recovery=False + ) + except ParserSyntaxError as e: + self.add_issue(e.error_leaf, message=self.message_syntax) + return + + issues = self._normalizer.grammar.iter_errors(eval_input) + self._normalizer.issues += issues def _check_format_spec(self, format_spec): for expression in format_spec.children[1:]: diff --git a/parso/python/fstring.py b/parso/python/fstring.py index 38208ca..28a960f 100644 --- a/parso/python/fstring.py +++ b/parso/python/fstring.py @@ -154,7 +154,7 @@ def _tokenize(code, start_pos=(1, 0)): ) start = len(code) break - expression += found + code[start:index] + expression += found + code[start:index+1] start = index + 1 elif found == '!' and len(code) > start and code[start] == '=': # This is a python `!=` and not a conversion. diff --git a/parso/python/grammar36.txt b/parso/python/grammar36.txt index 4696983..70e9b3e 100644 --- a/parso/python/grammar36.txt +++ b/parso/python/grammar36.txt @@ -11,6 +11,8 @@ single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE file_input: (NEWLINE | stmt)* ENDMARKER eval_input: testlist NEWLINE* ENDMARKER +# Dave: A modification to parse f-strings. +testlist_comp_with_endmarker: testlist NEWLINE* ENDMARKER decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE decorators: decorator+ diff --git a/parso/python/parser.py b/parso/python/parser.py index 039d625..1897f53 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -1,7 +1,6 @@ from parso.python import tree -from parso.python import tokenize from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, - STRING, tok_name) + STRING, tok_name, NAME) from parso.parser import BaseParser from parso.pgen2.parse import token_to_ilabel @@ -117,7 +116,7 @@ class Parser(BaseParser): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) - if type == tokenize.NAME: + if type == NAME: if value in pgen_grammar.keywords: return tree.Keyword(value, start_pos, prefix) else: @@ -247,7 +246,7 @@ class Parser(BaseParser): def _recovery_tokenize(self, tokens): for typ, value, start_pos, prefix in tokens: - # print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix)) + # print(tok_name[typ], repr(value), start_pos, repr(prefix)) if typ == DEDENT: # We need to count indents, because if we just omit any DEDENT, # we might omit them in the wrong place. diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 1316480..fa07aeb 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -222,13 +222,13 @@ class PythonToken(Token): self._replace(type=self._get_type_name())) -def tokenize(code, version_info): +def tokenize(code, version_info, start_pos=(1, 0)): """Generate tokens from a the source code (string).""" lines = split_lines(code, keepends=True) - return tokenize_lines(lines, version_info) + return tokenize_lines(lines, version_info, start_pos=start_pos) -def tokenize_lines(lines, version_info): +def tokenize_lines(lines, version_info, start_pos=(1, 0)): """ A heavily modified Python standard library tokenizer. @@ -252,14 +252,22 @@ def tokenize_lines(lines, version_info): prefix = '' # Should never be required, but here for safety additional_prefix = '' first = True - for lnum, line in enumerate(lines, 1): # loop over lines in stream + lnum = start_pos[0] - 1 + for line in lines: # loop over lines in stream + lnum += 1 + pos, max = 0, len(line) if first: if line.startswith(BOM_UTF8_STRING): additional_prefix = BOM_UTF8_STRING line = line[1:] + + # Fake that the part before was already parsed. + line = '^' * start_pos[1] + line + pos = start_pos[1] + max += start_pos[1] + first = False - pos, max = 0, len(line) if contstr: # continued string endmatch = endprog.match(line) if endmatch: diff --git a/test/failing_examples.py b/test/failing_examples.py index e235a12..64d5cb2 100644 --- a/test/failing_examples.py +++ b/test/failing_examples.py @@ -150,6 +150,13 @@ FAILING_EXAMPLES = [ "f'{'", "f'}'", "f'{\"}'", + "f'{\"}'", + # Now nested parsing + "f'{1+}'", + "f'{continue}'", + "f'{1;1}'", + "f'{a=3}'", + "f'{b\"\" \"\"}'", ] GLOBAL_NONLOCAL_ERROR = [ diff --git a/test/test_python_errors.py b/test/test_python_errors.py index a3e2ac7..da68b8e 100644 --- a/test/test_python_errors.py +++ b/test/test_python_errors.py @@ -237,3 +237,12 @@ def test_too_many_levels_of_indentation(): base = 'def x():\n if x:\n' assert not _get_error_list(build_nested('pass', 49, base=base)) assert _get_error_list(build_nested('pass', 50, base=base)) + + +@pytest.mark.parametrize( + 'code', [ + "f'{*args,}'", + ] +) +def test_valid_fstrings(code): + assert not _get_error_list(code, version='3.6')