Most fstrings issues should be fixed now.

This commit is contained in:
Dave Halter
2017-08-26 19:15:09 +02:00
parent 5b7a01ba62
commit 66606403c7
9 changed files with 72 additions and 21 deletions

View File

@@ -66,14 +66,18 @@ class Grammar(object):
:return: A syntax tree node. Typically the module.
"""
if 'start_pos' in kwargs:
raise TypeError("parse() got an unexpected keyworda argument.")
return self._parse(code=code, **kwargs)
def _parse(self, code=None, path=None, error_recovery=True,
start_symbol='file_input', cache=False, diff_cache=False,
cache_path=None):
cache_path=None, start_pos=(1, 0)):
"""
Wanted python3.5 * operator and keyword only arguments. Therefore just
wrap it all.
start_pos here is just a parameter internally used. Might be public
sometime in the future.
"""
if code is None and path is None:
raise TypeError("Please provide either code or a path.")
@@ -121,7 +125,7 @@ class Grammar(object):
cache_path=cache_path)
return new_node
tokens = self._tokenizer(lines)
tokens = self._tokenizer(lines, start_pos)
p = self._parser(
self._pgen_grammar,
@@ -190,8 +194,8 @@ class PythonGrammar(Grammar):
)
self.version_info = version_info
def _tokenize_lines(self, lines):
return tokenize_lines(lines, self.version_info)
def _tokenize_lines(self, lines, start_pos):
return tokenize_lines(lines, self.version_info, start_pos=start_pos)
def _tokenize(self, code):
# Used by Jedi.

View File

@@ -25,9 +25,9 @@ class ParserSyntaxError(Exception):
May be raised as an exception.
"""
def __init__(self, message, position):
def __init__(self, message, error_leaf):
self.message = message
self.position = position
self.error_leaf = error_leaf
class BaseParser(object):
@@ -60,7 +60,8 @@ class BaseParser(object):
if self._error_recovery:
raise NotImplementedError("Error Recovery is not implemented")
else:
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
def convert_node(self, pgen_grammar, type_, children):
# TODO REMOVE symbol, we don't want type here.

View File

@@ -6,6 +6,7 @@ from contextlib import contextmanager
from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
from parso.python.tree import search_ancestor
from parso.parser import ParserSyntaxError
_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist')
@@ -847,6 +848,7 @@ class _FStringRule(SyntaxRule):
message_unterminated_string = "f-string: unterminated string" # f'{"}'
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
message_incomplete = "f-string: expecting '}'" # f'{'
message_syntax = "invalid syntax"
@classmethod
def _load_grammar(cls):
@@ -883,12 +885,31 @@ class _FStringRule(SyntaxRule):
if '#' in value:
self.add_issue(python_expr, message=self.message_comment)
return
if re.match('\s*$', value) is not None:
self.add_issue(python_expr, message=self.message_empty)
return
# This is now nested parsing. We parsed the fstring and now
# we're parsing Python again.
module = self._normalizer.grammar.parse(value)
parsed_expr = module.children[0]
if parsed_expr.type == 'endmarker':
self.add_issue(python_expr, message=self.message_empty)
try:
# CPython has a bit of a special ways to parse Python code within
# f-strings. It wraps the code in brackets to make sure that
# whitespace doesn't make problems (indentation/newlines).
# Just use that algorithm as well here and adapt start positions.
start_pos = python_expr.start_pos
start_pos = start_pos[0], start_pos[1] - 1
eval_input = self._normalizer.grammar._parse(
'(%s)' % value,
start_symbol='eval_input',
start_pos=start_pos,
error_recovery=False
)
except ParserSyntaxError as e:
self.add_issue(e.error_leaf, message=self.message_syntax)
return
issues = self._normalizer.grammar.iter_errors(eval_input)
self._normalizer.issues += issues
def _check_format_spec(self, format_spec):
for expression in format_spec.children[1:]:

View File

@@ -154,7 +154,7 @@ def _tokenize(code, start_pos=(1, 0)):
)
start = len(code)
break
expression += found + code[start:index]
expression += found + code[start:index+1]
start = index + 1
elif found == '!' and len(code) > start and code[start] == '=':
# This is a python `!=` and not a conversion.

View File

@@ -11,6 +11,8 @@
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
# Dave: A modification to parse f-strings.
testlist_comp_with_endmarker: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+

View File

@@ -1,7 +1,6 @@
from parso.python import tree
from parso.python import tokenize
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
STRING, tok_name)
STRING, tok_name, NAME)
from parso.parser import BaseParser
from parso.pgen2.parse import token_to_ilabel
@@ -117,7 +116,7 @@ class Parser(BaseParser):
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type])
if type == tokenize.NAME:
if type == NAME:
if value in pgen_grammar.keywords:
return tree.Keyword(value, start_pos, prefix)
else:
@@ -247,7 +246,7 @@ class Parser(BaseParser):
def _recovery_tokenize(self, tokens):
for typ, value, start_pos, prefix in tokens:
# print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
if typ == DEDENT:
# We need to count indents, because if we just omit any DEDENT,
# we might omit them in the wrong place.

View File

@@ -222,13 +222,13 @@ class PythonToken(Token):
self._replace(type=self._get_type_name()))
def tokenize(code, version_info):
def tokenize(code, version_info, start_pos=(1, 0)):
"""Generate tokens from a the source code (string)."""
lines = split_lines(code, keepends=True)
return tokenize_lines(lines, version_info)
return tokenize_lines(lines, version_info, start_pos=start_pos)
def tokenize_lines(lines, version_info):
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
"""
A heavily modified Python standard library tokenizer.
@@ -252,14 +252,22 @@ def tokenize_lines(lines, version_info):
prefix = '' # Should never be required, but here for safety
additional_prefix = ''
first = True
for lnum, line in enumerate(lines, 1): # loop over lines in stream
lnum = start_pos[0] - 1
for line in lines: # loop over lines in stream
lnum += 1
pos, max = 0, len(line)
if first:
if line.startswith(BOM_UTF8_STRING):
additional_prefix = BOM_UTF8_STRING
line = line[1:]
# Fake that the part before was already parsed.
line = '^' * start_pos[1] + line
pos = start_pos[1]
max += start_pos[1]
first = False
pos, max = 0, len(line)
if contstr: # continued string
endmatch = endprog.match(line)
if endmatch: