diff --git a/parso/grammar.py b/parso/grammar.py index e5abf81..c825b55 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -12,7 +12,6 @@ from parso.parser import BaseParser from parso.python.parser import Parser as PythonParser from parso.python.errors import ErrorFinderConfig from parso.python import pep8 -from parso.python import fstring _loaded_grammars = {} @@ -186,7 +185,6 @@ class Grammar(object): normalizer.walk(node) return normalizer.issues - def __repr__(self): labels = self._pgen_grammar.number2symbol.values() txt = ' '.join(list(labels)[:3]) + ' ...' @@ -215,34 +213,6 @@ class PythonGrammar(Grammar): return tokenize(code, self.version_info) -class PythonFStringGrammar(Grammar): - _token_namespace = fstring.TokenNamespace - _start_symbol = 'fstring' - - def __init__(self): - super(PythonFStringGrammar, self).__init__( - text=fstring.GRAMMAR, - tokenizer=fstring.tokenize, - parser=fstring.Parser - ) - - def parse(self, code, **kwargs): - return self._parse(code, **kwargs) - - def _parse(self, code, error_recovery=True, start_pos=(1, 0)): - tokens = self._tokenizer(code, start_pos=start_pos) - p = self._parser( - self._pgen_grammar, - error_recovery=error_recovery, - start_symbol=self._start_symbol, - ) - return p.parse(tokens=tokens) - - def parse_leaf(self, leaf, error_recovery=True): - code = leaf._get_payload() - return self.parse(code, error_recovery=True, start_pos=leaf.start_pos) - - def load_grammar(**kwargs): """ Loads a :py:class:`parso.Grammar`. The default version is the current Python @@ -273,10 +243,6 @@ def load_grammar(**kwargs): except FileNotFoundError: message = "Python version %s is currently not supported." % version raise NotImplementedError(message) - elif language == 'python-f-string': - if version is not None: - raise NotImplementedError("Currently different versions are not supported.") - return PythonFStringGrammar() else: raise NotImplementedError("No support for language %s." % language) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index aaacfce..e2369d1 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -124,7 +124,9 @@ class PgenParser(object): self.error_recovery = error_recovery def parse(self, tokens): - for type_, value, start_pos, prefix in tokens: + for tok in tokens: + print(tok) + type_, value, start_pos, prefix = tok if self.add_token(type_, value, start_pos, prefix): break else: diff --git a/parso/python/fstring.py b/parso/python/fstring.py index a8fe7b4..e69de29 100644 --- a/parso/python/fstring.py +++ b/parso/python/fstring.py @@ -1,211 +0,0 @@ -import re - -from itertools import count -from parso.utils import PythonVersionInfo -from parso.utils import split_lines -from parso.python.tokenize import Token -from parso import parser -from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf - -version36 = PythonVersionInfo(3, 6) - - -class TokenNamespace: - _c = count() - LBRACE = next(_c) - RBRACE = next(_c) - ENDMARKER = next(_c) - COLON = next(_c) - CONVERSION = next(_c) - PYTHON_EXPR = next(_c) - EXCLAMATION_MARK = next(_c) - UNTERMINATED_STRING = next(_c) - - token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_')) - - @classmethod - def generate_token_id(cls, string): - if string == '{': - return cls.LBRACE - elif string == '}': - return cls.RBRACE - elif string == '!': - return cls.EXCLAMATION_MARK - elif string == ':': - return cls.COLON - return getattr(cls, string) - - -GRAMMAR = """ -fstring: expression* ENDMARKER -format_spec: ':' expression* -expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}' -""" - -_prefix = r'((?:[^{}]+)*)' -_expr = _prefix + r'(\{|\}|$)' -_in_expr = r'([^{}\[\]:"\'!]*)(.?)' -# There's only one conversion character allowed. But the rules have to be -# checked later anyway, so allow more here. This makes error recovery nicer. -_conversion = r'([^={}:]*)(.?)' - -_compiled_expr = re.compile(_expr) -_compiled_in_expr = re.compile(_in_expr) -_compiled_conversion = re.compile(_conversion) - - -def tokenize(code, start_pos=(1, 0)): - def add_to_pos(string): - lines = split_lines(string) - l = len(lines[-1]) - if len(lines) > 1: - start_pos[0] += len(lines) - 1 - start_pos[1] = l - else: - start_pos[1] += l - - def tok(value, type=None, prefix=''): - if type is None: - type = TokenNamespace.generate_token_id(value) - - add_to_pos(prefix) - token = Token(type, value, tuple(start_pos), prefix) - add_to_pos(value) - return token - - start = 0 - recursion_level = 0 - added_prefix = '' - start_pos = list(start_pos) - while True: - match = _compiled_expr.match(code, start) - prefix = added_prefix + match.group(1) - found = match.group(2) - start = match.end() - if not found: - # We're at the end. - break - - if found == '}': - if recursion_level == 0 and len(code) > start and code[start] == '}': - # This is a }} escape. - added_prefix = prefix + '}}' - start += 1 - continue - - recursion_level = max(0, recursion_level - 1) - yield tok(found, prefix=prefix) - added_prefix = '' - else: - assert found == '{' - if recursion_level == 0 and len(code) > start and code[start] == '{': - # This is a {{ escape. - added_prefix = prefix + '{{' - start += 1 - continue - - recursion_level += 1 - yield tok(found, prefix=prefix) - added_prefix = '' - - expression = '' - squared_count = 0 - curly_count = 0 - while True: - expr_match = _compiled_in_expr.match(code, start) - expression += expr_match.group(1) - found = expr_match.group(2) - start = expr_match.end() - - if found == '{': - curly_count += 1 - expression += found - elif found == '}' and curly_count > 0: - curly_count -= 1 - expression += found - elif found == '[': - squared_count += 1 - expression += found - elif found == ']': - # Use a max function here, because the Python code might - # just have syntax errors. - squared_count = max(0, squared_count - 1) - expression += found - elif found == ':' and (squared_count or curly_count): - expression += found - elif found in ('"', "'"): - search = found - if len(code) > start + 1 and \ - code[start] == found == code[start+1]: - search *= 3 - start += 2 - - index = code.find(search, start) - if index == -1: - yield tok(expression, type=TokenNamespace.PYTHON_EXPR) - yield tok( - found + code[start:], - type=TokenNamespace.UNTERMINATED_STRING, - ) - start = len(code) - break - expression += found + code[start:index+1] - start = index + 1 - elif found == '!' and len(code) > start and code[start] == '=': - # This is a python `!=` and not a conversion. - expression += found - else: - yield tok(expression, type=TokenNamespace.PYTHON_EXPR) - if found: - yield tok(found) - break - - if found == '!': - conversion_match = _compiled_conversion.match(code, start) - found = conversion_match.group(2) - start = conversion_match.end() - yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION) - if found: - yield tok(found) - if found == '}': - recursion_level -= 1 - - # We don't need to handle everything after ':', because that is - # basically new tokens. - - yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix) - - -class Parser(parser.BaseParser): - def parse(self, tokens): - node = super(Parser, self).parse(tokens) - if isinstance(node, self.default_leaf): # Is an endmarker. - # If there's no curly braces we get back a non-module. We always - # want an fstring. - node = self.default_node('fstring', [node]) - - return node - - def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): - # TODO this is so ugly. - leaf_type = TokenNamespace.token_map[type].lower() - return TypedLeaf(leaf_type, value, start_pos, prefix) - - def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback): - if not self._error_recovery: - return super(Parser, self).error_recovery( - pgen_grammar, stack, arcs, typ, value, start_pos, prefix, - add_token_callback - ) - - token_type = TokenNamespace.token_map[typ].lower() - if len(stack) == 1: - error_leaf = ErrorLeaf(token_type, value, start_pos, prefix) - stack[0][2][1].append(error_leaf) - else: - dfa, state, (type_, nodes) = stack[1] - stack[0][2][1].append(ErrorNode(nodes)) - stack[1:] = [] - - add_token_callback(typ, value, start_pos, prefix) diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 6c55c9a..7d72fc6 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -106,8 +106,8 @@ def _get_token_collection(version_info): return result -fstring_string_single_line = _compile(r'(?:[^{\r\n]+|\{\{)+') -fstring_string_multi_line = _compile(r'(?:[^{]+|\{\{)+') +fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') +fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') def _create_token_collection(version_info): @@ -253,7 +253,10 @@ class FStringNode(object): self.quote = quote self.parentheses_count = 0 self.previous_lines = '' - self.in_format_spec = False + self.last_string_start_pos = None + # In the syntax there can be multiple format_spec's nested: + # {x:{y:3}} + self.format_spec_count = 0 def open_parentheses(self, character): self.parentheses_count += 1 @@ -265,7 +268,7 @@ class FStringNode(object): return len(self.quote) == 3 def is_in_expr(self): - return self.parentheses_count + return (self.parentheses_count - self.format_spec_count) > 0 def _check_fstring_ending(fstring_stack, token, from_start=False): @@ -290,7 +293,7 @@ def _check_fstring_ending(fstring_stack, token, from_start=False): return fstring_index, fstring_end -def _find_fstring_string(fstring_stack, line, pos): +def _find_fstring_string(fstring_stack, line, lnum, pos): tos = fstring_stack[-1] if tos.is_in_expr(): return '', pos @@ -302,8 +305,12 @@ def _find_fstring_string(fstring_stack, line, pos): else: match = fstring_string_single_line.match(line, pos) if match is None: - string = fstring_stack[-1].previous_lines + string = tos.previous_lines else: + print(match, lnum, pos, repr(tos.previous_lines)) + if not tos.previous_lines: + tos.last_string_start_pos = (lnum, pos) + string = match.group(0) for fstring_stack_node in fstring_stack: try: @@ -313,12 +320,11 @@ def _find_fstring_string(fstring_stack, line, pos): new_pos += len(string) if allow_multiline and string.endswith('\n'): - fstring_stack[-1].previous_lines += string + tos.previous_lines += string string = '' else: - string = fstring_stack[-1].previous_lines + string + string = tos.previous_lines + string - fstring_stack[-1].previous_lines = '' return string, new_pos @@ -385,25 +391,31 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): while pos < max: if fstring_stack: - string, pos = _find_fstring_string(fstring_stack, line, pos) + string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) if string: - yield PythonToken(FSTRING_STRING, string, (lnum, pos), '') + yield PythonToken( + FSTRING_STRING, string, + fstring_stack[-1].last_string_start_pos, '' + ) + fstring_stack[-1].previous_lines = '' continue - if pos < max: - rest = line[pos:] - fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True) + if pos == max: + break - if fstring_index is not None: - yield PythonToken( - FSTRING_END, - fstring_stack[fstring_index].quote, - (lnum, pos), - prefix='' - ) - del fstring_stack[fstring_index:] - pos += end - continue + rest = line[pos:] + fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True) + + if fstring_index is not None: + yield PythonToken( + FSTRING_END, + fstring_stack[fstring_index].quote, + (lnum, pos), + prefix='' + ) + del fstring_stack[fstring_index:] + pos += end + continue pseudomatch = pseudo_token.match(line, pos) if not pseudomatch: # scan for tokens @@ -531,7 +543,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): paren_level -= 1 elif token == ':' and fstring_stack \ and fstring_stack[-1].parentheses_count == 1: - fstring_stack[-1].in_format_spec = True + fstring_stack[-1].format_spec_count += 1 try: # This check is needed in any case to check if it's a valid diff --git a/test/test_fstring.py b/test/test_fstring.py index 936d7fb..59debd9 100644 --- a/test/test_fstring.py +++ b/test/test_fstring.py @@ -1,17 +1,18 @@ import pytest from parso import load_grammar, ParserSyntaxError -from parso.python.fstring import tokenize +from parso.python.tokenize import tokenize @pytest.fixture def grammar(): - return load_grammar(language="python-f-string") + return load_grammar(version='3.6') @pytest.mark.parametrize( 'code', [ '{1}', + '{1:}', '', '{1!a}', '{1!a:1}', @@ -26,22 +27,12 @@ def grammar(): '{{{1}', '1{{2{{3', '}}', - '{:}}}', - - # Invalid, but will be checked, later. - '{}', - '{1:}', - '{:}', - '{:1}', - '{!:}', - '{!}', - '{!a}', - '{1:{}}', - '{1:{:}}', ] ) def test_valid(code, grammar): - fstring = grammar.parse(code, error_recovery=False) + code = 'f"""%s"""' % code + module = grammar.parse(code, error_recovery=False) + fstring = module.children[0] assert fstring.type == 'fstring' assert fstring.get_code() == code @@ -52,24 +43,37 @@ def test_valid(code, grammar): '{', '{1!{a}}', '{!{a}}', + '{}', + '{:}', + '{:}}}', + '{:1}', + '{!:}', + '{!}', + '{!a}', + '{1:{}}', + '{1:{:}}', ] ) def test_invalid(code, grammar): + code = 'f"""%s"""' % code with pytest.raises(ParserSyntaxError): grammar.parse(code, error_recovery=False) # It should work with error recovery. - #grammar.parse(code, error_recovery=True) + grammar.parse(code, error_recovery=True) @pytest.mark.parametrize( - ('code', 'start_pos', 'positions'), [ + ('code', 'positions'), [ # 2 times 2, 5 because python expr and endmarker. - ('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]), - (' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]), - ('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]), + ('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]), + ('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9), + (1, 10), (1, 11), (1, 12), (1, 13)]), + ('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1), + (4, 2), (4, 5)]), ] ) -def test_tokenize_start_pos(code, start_pos, positions): - tokens = tokenize(code, start_pos) +def test_tokenize_start_pos(code, positions): + tokens = list(tokenize(code, version_info=(3, 6))) + print(tokens) assert positions == [p.start_pos for p in tokens]