mirror of
https://github.com/davidhalter/parso.git
synced 2026-05-18 06:20:03 +08:00
Most fstrings issues should be fixed now.
This commit is contained in:
+8
-4
@@ -66,14 +66,18 @@ class Grammar(object):
|
|||||||
|
|
||||||
:return: A syntax tree node. Typically the module.
|
:return: A syntax tree node. Typically the module.
|
||||||
"""
|
"""
|
||||||
|
if 'start_pos' in kwargs:
|
||||||
|
raise TypeError("parse() got an unexpected keyworda argument.")
|
||||||
return self._parse(code=code, **kwargs)
|
return self._parse(code=code, **kwargs)
|
||||||
|
|
||||||
def _parse(self, code=None, path=None, error_recovery=True,
|
def _parse(self, code=None, path=None, error_recovery=True,
|
||||||
start_symbol='file_input', cache=False, diff_cache=False,
|
start_symbol='file_input', cache=False, diff_cache=False,
|
||||||
cache_path=None):
|
cache_path=None, start_pos=(1, 0)):
|
||||||
"""
|
"""
|
||||||
Wanted python3.5 * operator and keyword only arguments. Therefore just
|
Wanted python3.5 * operator and keyword only arguments. Therefore just
|
||||||
wrap it all.
|
wrap it all.
|
||||||
|
start_pos here is just a parameter internally used. Might be public
|
||||||
|
sometime in the future.
|
||||||
"""
|
"""
|
||||||
if code is None and path is None:
|
if code is None and path is None:
|
||||||
raise TypeError("Please provide either code or a path.")
|
raise TypeError("Please provide either code or a path.")
|
||||||
@@ -121,7 +125,7 @@ class Grammar(object):
|
|||||||
cache_path=cache_path)
|
cache_path=cache_path)
|
||||||
return new_node
|
return new_node
|
||||||
|
|
||||||
tokens = self._tokenizer(lines)
|
tokens = self._tokenizer(lines, start_pos)
|
||||||
|
|
||||||
p = self._parser(
|
p = self._parser(
|
||||||
self._pgen_grammar,
|
self._pgen_grammar,
|
||||||
@@ -190,8 +194,8 @@ class PythonGrammar(Grammar):
|
|||||||
)
|
)
|
||||||
self.version_info = version_info
|
self.version_info = version_info
|
||||||
|
|
||||||
def _tokenize_lines(self, lines):
|
def _tokenize_lines(self, lines, start_pos):
|
||||||
return tokenize_lines(lines, self.version_info)
|
return tokenize_lines(lines, self.version_info, start_pos=start_pos)
|
||||||
|
|
||||||
def _tokenize(self, code):
|
def _tokenize(self, code):
|
||||||
# Used by Jedi.
|
# Used by Jedi.
|
||||||
|
|||||||
+4
-3
@@ -25,9 +25,9 @@ class ParserSyntaxError(Exception):
|
|||||||
|
|
||||||
May be raised as an exception.
|
May be raised as an exception.
|
||||||
"""
|
"""
|
||||||
def __init__(self, message, position):
|
def __init__(self, message, error_leaf):
|
||||||
self.message = message
|
self.message = message
|
||||||
self.position = position
|
self.error_leaf = error_leaf
|
||||||
|
|
||||||
|
|
||||||
class BaseParser(object):
|
class BaseParser(object):
|
||||||
@@ -60,7 +60,8 @@ class BaseParser(object):
|
|||||||
if self._error_recovery:
|
if self._error_recovery:
|
||||||
raise NotImplementedError("Error Recovery is not implemented")
|
raise NotImplementedError("Error Recovery is not implemented")
|
||||||
else:
|
else:
|
||||||
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
|
error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
|
||||||
|
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
||||||
|
|
||||||
def convert_node(self, pgen_grammar, type_, children):
|
def convert_node(self, pgen_grammar, type_, children):
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
# TODO REMOVE symbol, we don't want type here.
|
||||||
|
|||||||
+25
-4
@@ -6,6 +6,7 @@ from contextlib import contextmanager
|
|||||||
|
|
||||||
from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
|
from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
|
||||||
from parso.python.tree import search_ancestor
|
from parso.python.tree import search_ancestor
|
||||||
|
from parso.parser import ParserSyntaxError
|
||||||
|
|
||||||
_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
|
_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
|
||||||
_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist')
|
_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist')
|
||||||
@@ -847,6 +848,7 @@ class _FStringRule(SyntaxRule):
|
|||||||
message_unterminated_string = "f-string: unterminated string" # f'{"}'
|
message_unterminated_string = "f-string: unterminated string" # f'{"}'
|
||||||
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
||||||
message_incomplete = "f-string: expecting '}'" # f'{'
|
message_incomplete = "f-string: expecting '}'" # f'{'
|
||||||
|
message_syntax = "invalid syntax"
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _load_grammar(cls):
|
def _load_grammar(cls):
|
||||||
@@ -883,12 +885,31 @@ class _FStringRule(SyntaxRule):
|
|||||||
if '#' in value:
|
if '#' in value:
|
||||||
self.add_issue(python_expr, message=self.message_comment)
|
self.add_issue(python_expr, message=self.message_comment)
|
||||||
return
|
return
|
||||||
|
if re.match('\s*$', value) is not None:
|
||||||
|
self.add_issue(python_expr, message=self.message_empty)
|
||||||
|
return
|
||||||
|
|
||||||
# This is now nested parsing. We parsed the fstring and now
|
# This is now nested parsing. We parsed the fstring and now
|
||||||
# we're parsing Python again.
|
# we're parsing Python again.
|
||||||
module = self._normalizer.grammar.parse(value)
|
try:
|
||||||
parsed_expr = module.children[0]
|
# CPython has a bit of a special ways to parse Python code within
|
||||||
if parsed_expr.type == 'endmarker':
|
# f-strings. It wraps the code in brackets to make sure that
|
||||||
self.add_issue(python_expr, message=self.message_empty)
|
# whitespace doesn't make problems (indentation/newlines).
|
||||||
|
# Just use that algorithm as well here and adapt start positions.
|
||||||
|
start_pos = python_expr.start_pos
|
||||||
|
start_pos = start_pos[0], start_pos[1] - 1
|
||||||
|
eval_input = self._normalizer.grammar._parse(
|
||||||
|
'(%s)' % value,
|
||||||
|
start_symbol='eval_input',
|
||||||
|
start_pos=start_pos,
|
||||||
|
error_recovery=False
|
||||||
|
)
|
||||||
|
except ParserSyntaxError as e:
|
||||||
|
self.add_issue(e.error_leaf, message=self.message_syntax)
|
||||||
|
return
|
||||||
|
|
||||||
|
issues = self._normalizer.grammar.iter_errors(eval_input)
|
||||||
|
self._normalizer.issues += issues
|
||||||
|
|
||||||
def _check_format_spec(self, format_spec):
|
def _check_format_spec(self, format_spec):
|
||||||
for expression in format_spec.children[1:]:
|
for expression in format_spec.children[1:]:
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ def _tokenize(code, start_pos=(1, 0)):
|
|||||||
)
|
)
|
||||||
start = len(code)
|
start = len(code)
|
||||||
break
|
break
|
||||||
expression += found + code[start:index]
|
expression += found + code[start:index+1]
|
||||||
start = index + 1
|
start = index + 1
|
||||||
elif found == '!' and len(code) > start and code[start] == '=':
|
elif found == '!' and len(code) > start and code[start] == '=':
|
||||||
# This is a python `!=` and not a conversion.
|
# This is a python `!=` and not a conversion.
|
||||||
|
|||||||
@@ -11,6 +11,8 @@
|
|||||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
eval_input: testlist NEWLINE* ENDMARKER
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
# Dave: A modification to parse f-strings.
|
||||||
|
testlist_comp_with_endmarker: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
decorators: decorator+
|
decorators: decorator+
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from parso.python import tree
|
from parso.python import tree
|
||||||
from parso.python import tokenize
|
|
||||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||||
STRING, tok_name)
|
STRING, tok_name, NAME)
|
||||||
from parso.parser import BaseParser
|
from parso.parser import BaseParser
|
||||||
from parso.pgen2.parse import token_to_ilabel
|
from parso.pgen2.parse import token_to_ilabel
|
||||||
|
|
||||||
@@ -117,7 +116,7 @@ class Parser(BaseParser):
|
|||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||||
# print('leaf', repr(value), token.tok_name[type])
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
if type == tokenize.NAME:
|
if type == NAME:
|
||||||
if value in pgen_grammar.keywords:
|
if value in pgen_grammar.keywords:
|
||||||
return tree.Keyword(value, start_pos, prefix)
|
return tree.Keyword(value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
@@ -247,7 +246,7 @@ class Parser(BaseParser):
|
|||||||
|
|
||||||
def _recovery_tokenize(self, tokens):
|
def _recovery_tokenize(self, tokens):
|
||||||
for typ, value, start_pos, prefix in tokens:
|
for typ, value, start_pos, prefix in tokens:
|
||||||
# print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
|
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||||
if typ == DEDENT:
|
if typ == DEDENT:
|
||||||
# We need to count indents, because if we just omit any DEDENT,
|
# We need to count indents, because if we just omit any DEDENT,
|
||||||
# we might omit them in the wrong place.
|
# we might omit them in the wrong place.
|
||||||
|
|||||||
@@ -222,13 +222,13 @@ class PythonToken(Token):
|
|||||||
self._replace(type=self._get_type_name()))
|
self._replace(type=self._get_type_name()))
|
||||||
|
|
||||||
|
|
||||||
def tokenize(code, version_info):
|
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||||
"""Generate tokens from a the source code (string)."""
|
"""Generate tokens from a the source code (string)."""
|
||||||
lines = split_lines(code, keepends=True)
|
lines = split_lines(code, keepends=True)
|
||||||
return tokenize_lines(lines, version_info)
|
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||||
|
|
||||||
|
|
||||||
def tokenize_lines(lines, version_info):
|
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||||
"""
|
"""
|
||||||
A heavily modified Python standard library tokenizer.
|
A heavily modified Python standard library tokenizer.
|
||||||
|
|
||||||
@@ -252,14 +252,22 @@ def tokenize_lines(lines, version_info):
|
|||||||
prefix = '' # Should never be required, but here for safety
|
prefix = '' # Should never be required, but here for safety
|
||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
first = True
|
first = True
|
||||||
for lnum, line in enumerate(lines, 1): # loop over lines in stream
|
lnum = start_pos[0] - 1
|
||||||
|
for line in lines: # loop over lines in stream
|
||||||
|
lnum += 1
|
||||||
|
pos, max = 0, len(line)
|
||||||
if first:
|
if first:
|
||||||
if line.startswith(BOM_UTF8_STRING):
|
if line.startswith(BOM_UTF8_STRING):
|
||||||
additional_prefix = BOM_UTF8_STRING
|
additional_prefix = BOM_UTF8_STRING
|
||||||
line = line[1:]
|
line = line[1:]
|
||||||
|
|
||||||
|
# Fake that the part before was already parsed.
|
||||||
|
line = '^' * start_pos[1] + line
|
||||||
|
pos = start_pos[1]
|
||||||
|
max += start_pos[1]
|
||||||
|
|
||||||
first = False
|
first = False
|
||||||
|
|
||||||
pos, max = 0, len(line)
|
|
||||||
if contstr: # continued string
|
if contstr: # continued string
|
||||||
endmatch = endprog.match(line)
|
endmatch = endprog.match(line)
|
||||||
if endmatch:
|
if endmatch:
|
||||||
|
|||||||
@@ -150,6 +150,13 @@ FAILING_EXAMPLES = [
|
|||||||
"f'{'",
|
"f'{'",
|
||||||
"f'}'",
|
"f'}'",
|
||||||
"f'{\"}'",
|
"f'{\"}'",
|
||||||
|
"f'{\"}'",
|
||||||
|
# Now nested parsing
|
||||||
|
"f'{1+}'",
|
||||||
|
"f'{continue}'",
|
||||||
|
"f'{1;1}'",
|
||||||
|
"f'{a=3}'",
|
||||||
|
"f'{b\"\" \"\"}'",
|
||||||
]
|
]
|
||||||
|
|
||||||
GLOBAL_NONLOCAL_ERROR = [
|
GLOBAL_NONLOCAL_ERROR = [
|
||||||
|
|||||||
@@ -237,3 +237,12 @@ def test_too_many_levels_of_indentation():
|
|||||||
base = 'def x():\n if x:\n'
|
base = 'def x():\n if x:\n'
|
||||||
assert not _get_error_list(build_nested('pass', 49, base=base))
|
assert not _get_error_list(build_nested('pass', 49, base=base))
|
||||||
assert _get_error_list(build_nested('pass', 50, base=base))
|
assert _get_error_list(build_nested('pass', 50, base=base))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'code', [
|
||||||
|
"f'{*args,}'",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_valid_fstrings(code):
|
||||||
|
assert not _get_error_list(code, version='3.6')
|
||||||
|
|||||||
Reference in New Issue
Block a user