mirror of
https://github.com/davidhalter/parso.git
synced 2026-01-28 05:42:30 +08:00
Most fstrings issues should be fixed now.
This commit is contained in:
@@ -66,14 +66,18 @@ class Grammar(object):
|
||||
|
||||
:return: A syntax tree node. Typically the module.
|
||||
"""
|
||||
if 'start_pos' in kwargs:
|
||||
raise TypeError("parse() got an unexpected keyworda argument.")
|
||||
return self._parse(code=code, **kwargs)
|
||||
|
||||
def _parse(self, code=None, path=None, error_recovery=True,
|
||||
start_symbol='file_input', cache=False, diff_cache=False,
|
||||
cache_path=None):
|
||||
cache_path=None, start_pos=(1, 0)):
|
||||
"""
|
||||
Wanted python3.5 * operator and keyword only arguments. Therefore just
|
||||
wrap it all.
|
||||
start_pos here is just a parameter internally used. Might be public
|
||||
sometime in the future.
|
||||
"""
|
||||
if code is None and path is None:
|
||||
raise TypeError("Please provide either code or a path.")
|
||||
@@ -121,7 +125,7 @@ class Grammar(object):
|
||||
cache_path=cache_path)
|
||||
return new_node
|
||||
|
||||
tokens = self._tokenizer(lines)
|
||||
tokens = self._tokenizer(lines, start_pos)
|
||||
|
||||
p = self._parser(
|
||||
self._pgen_grammar,
|
||||
@@ -190,8 +194,8 @@ class PythonGrammar(Grammar):
|
||||
)
|
||||
self.version_info = version_info
|
||||
|
||||
def _tokenize_lines(self, lines):
|
||||
return tokenize_lines(lines, self.version_info)
|
||||
def _tokenize_lines(self, lines, start_pos):
|
||||
return tokenize_lines(lines, self.version_info, start_pos=start_pos)
|
||||
|
||||
def _tokenize(self, code):
|
||||
# Used by Jedi.
|
||||
|
||||
@@ -25,9 +25,9 @@ class ParserSyntaxError(Exception):
|
||||
|
||||
May be raised as an exception.
|
||||
"""
|
||||
def __init__(self, message, position):
|
||||
def __init__(self, message, error_leaf):
|
||||
self.message = message
|
||||
self.position = position
|
||||
self.error_leaf = error_leaf
|
||||
|
||||
|
||||
class BaseParser(object):
|
||||
@@ -60,7 +60,8 @@ class BaseParser(object):
|
||||
if self._error_recovery:
|
||||
raise NotImplementedError("Error Recovery is not implemented")
|
||||
else:
|
||||
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
|
||||
error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
|
||||
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
||||
|
||||
def convert_node(self, pgen_grammar, type_, children):
|
||||
# TODO REMOVE symbol, we don't want type here.
|
||||
|
||||
@@ -6,6 +6,7 @@ from contextlib import contextmanager
|
||||
|
||||
from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
|
||||
from parso.python.tree import search_ancestor
|
||||
from parso.parser import ParserSyntaxError
|
||||
|
||||
_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
|
||||
_STAR_EXPR_PARENTS = ('testlist_star_expr', 'testlist_comp', 'exprlist')
|
||||
@@ -847,6 +848,7 @@ class _FStringRule(SyntaxRule):
|
||||
message_unterminated_string = "f-string: unterminated string" # f'{"}'
|
||||
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
||||
message_incomplete = "f-string: expecting '}'" # f'{'
|
||||
message_syntax = "invalid syntax"
|
||||
|
||||
@classmethod
|
||||
def _load_grammar(cls):
|
||||
@@ -883,12 +885,31 @@ class _FStringRule(SyntaxRule):
|
||||
if '#' in value:
|
||||
self.add_issue(python_expr, message=self.message_comment)
|
||||
return
|
||||
if re.match('\s*$', value) is not None:
|
||||
self.add_issue(python_expr, message=self.message_empty)
|
||||
return
|
||||
|
||||
# This is now nested parsing. We parsed the fstring and now
|
||||
# we're parsing Python again.
|
||||
module = self._normalizer.grammar.parse(value)
|
||||
parsed_expr = module.children[0]
|
||||
if parsed_expr.type == 'endmarker':
|
||||
self.add_issue(python_expr, message=self.message_empty)
|
||||
try:
|
||||
# CPython has a bit of a special ways to parse Python code within
|
||||
# f-strings. It wraps the code in brackets to make sure that
|
||||
# whitespace doesn't make problems (indentation/newlines).
|
||||
# Just use that algorithm as well here and adapt start positions.
|
||||
start_pos = python_expr.start_pos
|
||||
start_pos = start_pos[0], start_pos[1] - 1
|
||||
eval_input = self._normalizer.grammar._parse(
|
||||
'(%s)' % value,
|
||||
start_symbol='eval_input',
|
||||
start_pos=start_pos,
|
||||
error_recovery=False
|
||||
)
|
||||
except ParserSyntaxError as e:
|
||||
self.add_issue(e.error_leaf, message=self.message_syntax)
|
||||
return
|
||||
|
||||
issues = self._normalizer.grammar.iter_errors(eval_input)
|
||||
self._normalizer.issues += issues
|
||||
|
||||
def _check_format_spec(self, format_spec):
|
||||
for expression in format_spec.children[1:]:
|
||||
|
||||
@@ -154,7 +154,7 @@ def _tokenize(code, start_pos=(1, 0)):
|
||||
)
|
||||
start = len(code)
|
||||
break
|
||||
expression += found + code[start:index]
|
||||
expression += found + code[start:index+1]
|
||||
start = index + 1
|
||||
elif found == '!' and len(code) > start and code[start] == '=':
|
||||
# This is a python `!=` and not a conversion.
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
# Dave: A modification to parse f-strings.
|
||||
testlist_comp_with_endmarker: testlist NEWLINE* ENDMARKER
|
||||
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from parso.python import tree
|
||||
from parso.python import tokenize
|
||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||
STRING, tok_name)
|
||||
STRING, tok_name, NAME)
|
||||
from parso.parser import BaseParser
|
||||
from parso.pgen2.parse import token_to_ilabel
|
||||
|
||||
@@ -117,7 +116,7 @@ class Parser(BaseParser):
|
||||
|
||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||
# print('leaf', repr(value), token.tok_name[type])
|
||||
if type == tokenize.NAME:
|
||||
if type == NAME:
|
||||
if value in pgen_grammar.keywords:
|
||||
return tree.Keyword(value, start_pos, prefix)
|
||||
else:
|
||||
@@ -247,7 +246,7 @@ class Parser(BaseParser):
|
||||
|
||||
def _recovery_tokenize(self, tokens):
|
||||
for typ, value, start_pos, prefix in tokens:
|
||||
# print(tokenize.tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
|
||||
if typ == DEDENT:
|
||||
# We need to count indents, because if we just omit any DEDENT,
|
||||
# we might omit them in the wrong place.
|
||||
|
||||
@@ -222,13 +222,13 @@ class PythonToken(Token):
|
||||
self._replace(type=self._get_type_name()))
|
||||
|
||||
|
||||
def tokenize(code, version_info):
|
||||
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||
"""Generate tokens from a the source code (string)."""
|
||||
lines = split_lines(code, keepends=True)
|
||||
return tokenize_lines(lines, version_info)
|
||||
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||
|
||||
|
||||
def tokenize_lines(lines, version_info):
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
|
||||
@@ -252,14 +252,22 @@ def tokenize_lines(lines, version_info):
|
||||
prefix = '' # Should never be required, but here for safety
|
||||
additional_prefix = ''
|
||||
first = True
|
||||
for lnum, line in enumerate(lines, 1): # loop over lines in stream
|
||||
lnum = start_pos[0] - 1
|
||||
for line in lines: # loop over lines in stream
|
||||
lnum += 1
|
||||
pos, max = 0, len(line)
|
||||
if first:
|
||||
if line.startswith(BOM_UTF8_STRING):
|
||||
additional_prefix = BOM_UTF8_STRING
|
||||
line = line[1:]
|
||||
|
||||
# Fake that the part before was already parsed.
|
||||
line = '^' * start_pos[1] + line
|
||||
pos = start_pos[1]
|
||||
max += start_pos[1]
|
||||
|
||||
first = False
|
||||
|
||||
pos, max = 0, len(line)
|
||||
if contstr: # continued string
|
||||
endmatch = endprog.match(line)
|
||||
if endmatch:
|
||||
|
||||
Reference in New Issue
Block a user