Remove the old f-string grammar and fix the tests with the new syntax

This commit is contained in:
Dave Halter
2018-04-07 02:11:26 +02:00
parent 9941348ec6
commit 8f1a436ba1
5 changed files with 66 additions and 293 deletions

View File

@@ -12,7 +12,6 @@ from parso.parser import BaseParser
from parso.python.parser import Parser as PythonParser
from parso.python.errors import ErrorFinderConfig
from parso.python import pep8
from parso.python import fstring
_loaded_grammars = {}
@@ -186,7 +185,6 @@ class Grammar(object):
normalizer.walk(node)
return normalizer.issues
def __repr__(self):
labels = self._pgen_grammar.number2symbol.values()
txt = ' '.join(list(labels)[:3]) + ' ...'
@@ -215,34 +213,6 @@ class PythonGrammar(Grammar):
return tokenize(code, self.version_info)
class PythonFStringGrammar(Grammar):
_token_namespace = fstring.TokenNamespace
_start_symbol = 'fstring'
def __init__(self):
super(PythonFStringGrammar, self).__init__(
text=fstring.GRAMMAR,
tokenizer=fstring.tokenize,
parser=fstring.Parser
)
def parse(self, code, **kwargs):
return self._parse(code, **kwargs)
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
tokens = self._tokenizer(code, start_pos=start_pos)
p = self._parser(
self._pgen_grammar,
error_recovery=error_recovery,
start_symbol=self._start_symbol,
)
return p.parse(tokens=tokens)
def parse_leaf(self, leaf, error_recovery=True):
code = leaf._get_payload()
return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
def load_grammar(**kwargs):
"""
Loads a :py:class:`parso.Grammar`. The default version is the current Python
@@ -273,10 +243,6 @@ def load_grammar(**kwargs):
except FileNotFoundError:
message = "Python version %s is currently not supported." % version
raise NotImplementedError(message)
elif language == 'python-f-string':
if version is not None:
raise NotImplementedError("Currently different versions are not supported.")
return PythonFStringGrammar()
else:
raise NotImplementedError("No support for language %s." % language)

View File

@@ -124,7 +124,9 @@ class PgenParser(object):
self.error_recovery = error_recovery
def parse(self, tokens):
for type_, value, start_pos, prefix in tokens:
for tok in tokens:
print(tok)
type_, value, start_pos, prefix = tok
if self.add_token(type_, value, start_pos, prefix):
break
else:

View File

@@ -1,211 +0,0 @@
import re
from itertools import count
from parso.utils import PythonVersionInfo
from parso.utils import split_lines
from parso.python.tokenize import Token
from parso import parser
from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
version36 = PythonVersionInfo(3, 6)
class TokenNamespace:
_c = count()
LBRACE = next(_c)
RBRACE = next(_c)
ENDMARKER = next(_c)
COLON = next(_c)
CONVERSION = next(_c)
PYTHON_EXPR = next(_c)
EXCLAMATION_MARK = next(_c)
UNTERMINATED_STRING = next(_c)
token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
@classmethod
def generate_token_id(cls, string):
if string == '{':
return cls.LBRACE
elif string == '}':
return cls.RBRACE
elif string == '!':
return cls.EXCLAMATION_MARK
elif string == ':':
return cls.COLON
return getattr(cls, string)
GRAMMAR = """
fstring: expression* ENDMARKER
format_spec: ':' expression*
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
"""
_prefix = r'((?:[^{}]+)*)'
_expr = _prefix + r'(\{|\}|$)'
_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
# There's only one conversion character allowed. But the rules have to be
# checked later anyway, so allow more here. This makes error recovery nicer.
_conversion = r'([^={}:]*)(.?)'
_compiled_expr = re.compile(_expr)
_compiled_in_expr = re.compile(_in_expr)
_compiled_conversion = re.compile(_conversion)
def tokenize(code, start_pos=(1, 0)):
def add_to_pos(string):
lines = split_lines(string)
l = len(lines[-1])
if len(lines) > 1:
start_pos[0] += len(lines) - 1
start_pos[1] = l
else:
start_pos[1] += l
def tok(value, type=None, prefix=''):
if type is None:
type = TokenNamespace.generate_token_id(value)
add_to_pos(prefix)
token = Token(type, value, tuple(start_pos), prefix)
add_to_pos(value)
return token
start = 0
recursion_level = 0
added_prefix = ''
start_pos = list(start_pos)
while True:
match = _compiled_expr.match(code, start)
prefix = added_prefix + match.group(1)
found = match.group(2)
start = match.end()
if not found:
# We're at the end.
break
if found == '}':
if recursion_level == 0 and len(code) > start and code[start] == '}':
# This is a }} escape.
added_prefix = prefix + '}}'
start += 1
continue
recursion_level = max(0, recursion_level - 1)
yield tok(found, prefix=prefix)
added_prefix = ''
else:
assert found == '{'
if recursion_level == 0 and len(code) > start and code[start] == '{':
# This is a {{ escape.
added_prefix = prefix + '{{'
start += 1
continue
recursion_level += 1
yield tok(found, prefix=prefix)
added_prefix = ''
expression = ''
squared_count = 0
curly_count = 0
while True:
expr_match = _compiled_in_expr.match(code, start)
expression += expr_match.group(1)
found = expr_match.group(2)
start = expr_match.end()
if found == '{':
curly_count += 1
expression += found
elif found == '}' and curly_count > 0:
curly_count -= 1
expression += found
elif found == '[':
squared_count += 1
expression += found
elif found == ']':
# Use a max function here, because the Python code might
# just have syntax errors.
squared_count = max(0, squared_count - 1)
expression += found
elif found == ':' and (squared_count or curly_count):
expression += found
elif found in ('"', "'"):
search = found
if len(code) > start + 1 and \
code[start] == found == code[start+1]:
search *= 3
start += 2
index = code.find(search, start)
if index == -1:
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
yield tok(
found + code[start:],
type=TokenNamespace.UNTERMINATED_STRING,
)
start = len(code)
break
expression += found + code[start:index+1]
start = index + 1
elif found == '!' and len(code) > start and code[start] == '=':
# This is a python `!=` and not a conversion.
expression += found
else:
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
if found:
yield tok(found)
break
if found == '!':
conversion_match = _compiled_conversion.match(code, start)
found = conversion_match.group(2)
start = conversion_match.end()
yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
if found:
yield tok(found)
if found == '}':
recursion_level -= 1
# We don't need to handle everything after ':', because that is
# basically new tokens.
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
class Parser(parser.BaseParser):
def parse(self, tokens):
node = super(Parser, self).parse(tokens)
if isinstance(node, self.default_leaf): # Is an endmarker.
# If there's no curly braces we get back a non-module. We always
# want an fstring.
node = self.default_node('fstring', [node])
return node
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
# TODO this is so ugly.
leaf_type = TokenNamespace.token_map[type].lower()
return TypedLeaf(leaf_type, value, start_pos, prefix)
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback):
if not self._error_recovery:
return super(Parser, self).error_recovery(
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
add_token_callback
)
token_type = TokenNamespace.token_map[typ].lower()
if len(stack) == 1:
error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
stack[0][2][1].append(error_leaf)
else:
dfa, state, (type_, nodes) = stack[1]
stack[0][2][1].append(ErrorNode(nodes))
stack[1:] = []
add_token_callback(typ, value, start_pos, prefix)

View File

@@ -106,8 +106,8 @@ def _get_token_collection(version_info):
return result
fstring_string_single_line = _compile(r'(?:[^{\r\n]+|\{\{)+')
fstring_string_multi_line = _compile(r'(?:[^{]+|\{\{)+')
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
def _create_token_collection(version_info):
@@ -253,7 +253,10 @@ class FStringNode(object):
self.quote = quote
self.parentheses_count = 0
self.previous_lines = ''
self.in_format_spec = False
self.last_string_start_pos = None
# In the syntax there can be multiple format_spec's nested:
# {x:{y:3}}
self.format_spec_count = 0
def open_parentheses(self, character):
self.parentheses_count += 1
@@ -265,7 +268,7 @@ class FStringNode(object):
return len(self.quote) == 3
def is_in_expr(self):
return self.parentheses_count
return (self.parentheses_count - self.format_spec_count) > 0
def _check_fstring_ending(fstring_stack, token, from_start=False):
@@ -290,7 +293,7 @@ def _check_fstring_ending(fstring_stack, token, from_start=False):
return fstring_index, fstring_end
def _find_fstring_string(fstring_stack, line, pos):
def _find_fstring_string(fstring_stack, line, lnum, pos):
tos = fstring_stack[-1]
if tos.is_in_expr():
return '', pos
@@ -302,8 +305,12 @@ def _find_fstring_string(fstring_stack, line, pos):
else:
match = fstring_string_single_line.match(line, pos)
if match is None:
string = fstring_stack[-1].previous_lines
string = tos.previous_lines
else:
print(match, lnum, pos, repr(tos.previous_lines))
if not tos.previous_lines:
tos.last_string_start_pos = (lnum, pos)
string = match.group(0)
for fstring_stack_node in fstring_stack:
try:
@@ -313,12 +320,11 @@ def _find_fstring_string(fstring_stack, line, pos):
new_pos += len(string)
if allow_multiline and string.endswith('\n'):
fstring_stack[-1].previous_lines += string
tos.previous_lines += string
string = ''
else:
string = fstring_stack[-1].previous_lines + string
string = tos.previous_lines + string
fstring_stack[-1].previous_lines = ''
return string, new_pos
@@ -385,12 +391,18 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
while pos < max:
if fstring_stack:
string, pos = _find_fstring_string(fstring_stack, line, pos)
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
if string:
yield PythonToken(FSTRING_STRING, string, (lnum, pos), '')
yield PythonToken(
FSTRING_STRING, string,
fstring_stack[-1].last_string_start_pos, ''
)
fstring_stack[-1].previous_lines = ''
continue
if pos < max:
if pos == max:
break
rest = line[pos:]
fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True)
@@ -531,7 +543,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
paren_level -= 1
elif token == ':' and fstring_stack \
and fstring_stack[-1].parentheses_count == 1:
fstring_stack[-1].in_format_spec = True
fstring_stack[-1].format_spec_count += 1
try:
# This check is needed in any case to check if it's a valid

View File

@@ -1,17 +1,18 @@
import pytest
from parso import load_grammar, ParserSyntaxError
from parso.python.fstring import tokenize
from parso.python.tokenize import tokenize
@pytest.fixture
def grammar():
return load_grammar(language="python-f-string")
return load_grammar(version='3.6')
@pytest.mark.parametrize(
'code', [
'{1}',
'{1:}',
'',
'{1!a}',
'{1!a:1}',
@@ -26,22 +27,12 @@ def grammar():
'{{{1}',
'1{{2{{3',
'}}',
'{:}}}',
# Invalid, but will be checked, later.
'{}',
'{1:}',
'{:}',
'{:1}',
'{!:}',
'{!}',
'{!a}',
'{1:{}}',
'{1:{:}}',
]
)
def test_valid(code, grammar):
fstring = grammar.parse(code, error_recovery=False)
code = 'f"""%s"""' % code
module = grammar.parse(code, error_recovery=False)
fstring = module.children[0]
assert fstring.type == 'fstring'
assert fstring.get_code() == code
@@ -52,24 +43,37 @@ def test_valid(code, grammar):
'{',
'{1!{a}}',
'{!{a}}',
'{}',
'{:}',
'{:}}}',
'{:1}',
'{!:}',
'{!}',
'{!a}',
'{1:{}}',
'{1:{:}}',
]
)
def test_invalid(code, grammar):
code = 'f"""%s"""' % code
with pytest.raises(ParserSyntaxError):
grammar.parse(code, error_recovery=False)
# It should work with error recovery.
#grammar.parse(code, error_recovery=True)
grammar.parse(code, error_recovery=True)
@pytest.mark.parametrize(
('code', 'start_pos', 'positions'), [
('code', 'positions'), [
# 2 times 2, 5 because python expr and endmarker.
('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
(' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
(1, 10), (1, 11), (1, 12), (1, 13)]),
('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
(4, 2), (4, 5)]),
]
)
def test_tokenize_start_pos(code, start_pos, positions):
tokens = tokenize(code, start_pos)
def test_tokenize_start_pos(code, positions):
tokens = list(tokenize(code, version_info=(3, 6)))
print(tokens)
assert positions == [p.start_pos for p in tokens]