Cleanup a lot of details in the tokenizer for fstrings

This commit is contained in:
Dave Halter
2018-03-31 14:25:29 +02:00
parent 235fda3fbb
commit b1aa7c6a79
2 changed files with 44 additions and 24 deletions

View File

@@ -150,8 +150,8 @@ yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist yield_arg: 'from' test | testlist
strings: (STRING | fstring)+ strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content FSTRING_END fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: (FSTRING_STRING | fstring_expr)* fstring_content: (FSTRING_STRING | fstring_expr)
fstring_conversion: '!' NAME fstring_conversion: '!' NAME
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}' fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content fstring_format_spec: ':' fstring_content*

View File

@@ -69,7 +69,7 @@ def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False
if version_info >= (3, 0): if version_info >= (3, 0):
valid_string_prefixes.append('br') valid_string_prefixes.append('br')
result = {''} result = set([''])
if version_info >= (3, 6) and include_fstring: if version_info >= (3, 6) and include_fstring:
f = ['f', 'fr'] f = ['f', 'fr']
if only_fstring: if only_fstring:
@@ -260,19 +260,25 @@ class FStringNode(object):
def close_parentheses(self, character): def close_parentheses(self, character):
self.parentheses_count -= 1 self.parentheses_count -= 1
return self.parentheses_count == 0
def allow_multiline(self): def allow_multiline(self):
return len(self.quote == 3) return len(self.quote) == 3
def is_in_expr(self): def is_in_expr(self):
return self.parentheses_count and not self.in_format_spec return self.parentheses_count and not self.in_format_spec
def _check_fstring_ending(fstring_stack, token): def _check_fstring_ending(fstring_stack, token, from_start=False):
fstring_end = float('inf') fstring_end = float('inf')
fstring_index = None fstring_index = None
for i, node in enumerate(fstring_stack): for i, node in enumerate(fstring_stack):
if from_start:
if token.startswith(node.quote):
fstring_index = i
fstring_end = len(node.quote)
else:
continue
else:
try: try:
end = token.index(node.quote) end = token.index(node.quote)
except ValueError: except ValueError:
@@ -296,7 +302,7 @@ def _find_fstring_string(fstring_stack, line, pos):
else: else:
match = fstring_string_single_line.match(line, pos) match = fstring_string_single_line.match(line, pos)
if match is None: if match is None:
string = fstring_stack.previous_lines string = fstring_stack[-1].previous_lines
else: else:
string = match.group(0) string = match.group(0)
for fstring_stack_node in fstring_stack: for fstring_stack_node in fstring_stack:
@@ -307,10 +313,12 @@ def _find_fstring_string(fstring_stack, line, pos):
new_pos += len(string) new_pos += len(string)
if allow_multiline and string.endswith('\n'): if allow_multiline and string.endswith('\n'):
fstring_stack.previous_lines += string fstring_stack[-1].previous_lines += string
string = '' string = ''
else: else:
string = fstring_stack_node.previous_lines + string string = fstring_stack[-1].previous_lines + string
fstring_stack[-1].previous_lines = ''
return string, new_pos return string, new_pos
@@ -376,14 +384,27 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
continue continue
while pos < max: while pos < max:
assert not fstring_stack
if fstring_stack: if fstring_stack:
string, pos = _find_fstring_string(fstring_stack, line, pos) string, pos = _find_fstring_string(fstring_stack, line, pos)
if string: if string:
fstring_stack.previous_lines = ''
yield PythonToken(FSTRING_STRING, string, (lnum, pos), '') yield PythonToken(FSTRING_STRING, string, (lnum, pos), '')
continue continue
if pos < max:
rest = line[pos:]
fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True)
if fstring_index is not None:
yield PythonToken(
FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, pos),
prefix=''
)
del fstring_stack[fstring_index:]
pos += end
continue
pseudomatch = pseudo_token.match(line, pos) pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens if not pseudomatch: # scan for tokens
txt = line[pos:] txt = line[pos:]
@@ -436,7 +457,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
(lnum, spos[1] + 1), (lnum, spos[1] + 1),
prefix='' prefix=''
) )
del fstring_index[fstring_index:] del fstring_stack[fstring_index:]
pos -= len(token) - end pos -= len(token) - end
continue continue
@@ -500,13 +521,12 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
else: else:
if token in '([{': if token in '([{':
if fstring_stack: if fstring_stack:
fstring_stack[-1].open_bracket(token) fstring_stack[-1].open_parentheses(token)
else: else:
paren_level += 1 paren_level += 1
elif token in ')]}': elif token in ')]}':
if fstring_stack: if fstring_stack:
if fstring_stack[-1].close_parentheses(token): fstring_stack[-1].close_parentheses(token)
fstring_stack.pop()
else: else:
paren_level -= 1 paren_level -= 1
elif token == ':' and fstring_stack \ elif token == ':' and fstring_stack \