mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-06 21:04:29 +08:00
Compare commits
41 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
579146b501 | ||
|
|
deb4dbce1c | ||
|
|
8eda8decea | ||
|
|
f6935935c0 | ||
|
|
d3fa7e1cad | ||
|
|
83d9abd036 | ||
|
|
222e9117b4 | ||
|
|
eda2207e6c | ||
|
|
a91e5f2775 | ||
|
|
cba4f2ccc1 | ||
|
|
8f1a436ba1 | ||
|
|
9941348ec6 | ||
|
|
afb71dc762 | ||
|
|
0d96b12566 | ||
|
|
9d2ce4bcd4 | ||
|
|
a3e280c2b9 | ||
|
|
7c7f4f4e54 | ||
|
|
56b3e2cdc8 | ||
|
|
97f042c6ba | ||
|
|
b1aa7c6a79 | ||
|
|
235fda3fbb | ||
|
|
d8d2e596a5 | ||
|
|
e05ce5ae31 | ||
|
|
25e4ea9c24 | ||
|
|
9f88fe16a3 | ||
|
|
ba0e7a2e9d | ||
|
|
dc80152ff8 | ||
|
|
9e3154d167 | ||
|
|
065da34272 | ||
|
|
f89809de9a | ||
|
|
332c57ebcb | ||
|
|
acb173b703 | ||
|
|
47e78b37fe | ||
|
|
fc44af6165 | ||
|
|
73439d5863 | ||
|
|
085aad3038 | ||
|
|
7db500bfbc | ||
|
|
e689f3dce6 | ||
|
|
b076cdc12a | ||
|
|
0dea94c801 | ||
|
|
6cf487aee2 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,3 +9,4 @@
|
||||
/dist/
|
||||
parso.egg-info/
|
||||
/.cache/
|
||||
/.pytest_cache
|
||||
|
||||
@@ -7,7 +7,7 @@ python:
|
||||
- 3.4
|
||||
- 3.5
|
||||
- 3.6
|
||||
- 3.7
|
||||
- 3.7-dev
|
||||
- pypy
|
||||
matrix:
|
||||
allow_failures:
|
||||
|
||||
@@ -3,6 +3,11 @@
|
||||
Changelog
|
||||
---------
|
||||
|
||||
0.2.0 (2018-04-15)
|
||||
+++++++++++++++++++
|
||||
|
||||
- f-strings are now parsed as a part of the normal Python grammar. This makes
|
||||
it way easier to deal with them.
|
||||
|
||||
0.1.1 (2017-11-05)
|
||||
+++++++++++++++++++
|
||||
|
||||
@@ -36,7 +36,7 @@ if [[ $tag_ref ]]; then
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
git tag $tag
|
||||
git tag -a $tag
|
||||
git push --tags
|
||||
fi
|
||||
|
||||
|
||||
1
docs/_themes/flask/layout.html
vendored
1
docs/_themes/flask/layout.html
vendored
@@ -19,7 +19,6 @@
|
||||
{% endblock %}
|
||||
{%- block footer %}
|
||||
<div class="footer">
|
||||
© Copyright {{ copyright }}.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
|
||||
</div>
|
||||
{% if pagename == 'index' %}
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
|
||||
import sys
|
||||
import os
|
||||
import datetime
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
@@ -45,7 +44,7 @@ master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'parso'
|
||||
copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
|
||||
copyright = u'parso contributors'
|
||||
|
||||
import parso
|
||||
from parso.utils import version_info
|
||||
|
||||
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode
|
||||
|
||||
|
||||
__version__ = '0.1.1'
|
||||
__version__ = '0.2.0'
|
||||
|
||||
|
||||
def parse(code=None, **kwargs):
|
||||
|
||||
@@ -36,7 +36,7 @@ except AttributeError:
|
||||
def u(string):
|
||||
"""Cast to unicode DAMMIT!
|
||||
Written because Python2 repr always implicitly casts to a string, so we
|
||||
have to cast back to a unicode (and we now that we always deal with valid
|
||||
have to cast back to a unicode (and we know that we always deal with valid
|
||||
unicode, because we check that in the beginning).
|
||||
"""
|
||||
if py_version >= 30:
|
||||
|
||||
@@ -12,7 +12,6 @@ from parso.parser import BaseParser
|
||||
from parso.python.parser import Parser as PythonParser
|
||||
from parso.python.errors import ErrorFinderConfig
|
||||
from parso.python import pep8
|
||||
from parso.python import fstring
|
||||
|
||||
_loaded_grammars = {}
|
||||
|
||||
@@ -73,7 +72,7 @@ class Grammar(object):
|
||||
:py:class:`parso.python.tree.Module`.
|
||||
"""
|
||||
if 'start_pos' in kwargs:
|
||||
raise TypeError("parse() got an unexpected keyworda argument.")
|
||||
raise TypeError("parse() got an unexpected keyword argument.")
|
||||
return self._parse(code=code, **kwargs)
|
||||
|
||||
def _parse(self, code=None, error_recovery=True, path=None,
|
||||
@@ -186,7 +185,6 @@ class Grammar(object):
|
||||
normalizer.walk(node)
|
||||
return normalizer.issues
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
labels = self._pgen_grammar.number2symbol.values()
|
||||
txt = ' '.join(list(labels)[:3]) + ' ...'
|
||||
@@ -215,34 +213,6 @@ class PythonGrammar(Grammar):
|
||||
return tokenize(code, self.version_info)
|
||||
|
||||
|
||||
class PythonFStringGrammar(Grammar):
|
||||
_token_namespace = fstring.TokenNamespace
|
||||
_start_symbol = 'fstring'
|
||||
|
||||
def __init__(self):
|
||||
super(PythonFStringGrammar, self).__init__(
|
||||
text=fstring.GRAMMAR,
|
||||
tokenizer=fstring.tokenize,
|
||||
parser=fstring.Parser
|
||||
)
|
||||
|
||||
def parse(self, code, **kwargs):
|
||||
return self._parse(code, **kwargs)
|
||||
|
||||
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
|
||||
tokens = self._tokenizer(code, start_pos=start_pos)
|
||||
p = self._parser(
|
||||
self._pgen_grammar,
|
||||
error_recovery=error_recovery,
|
||||
start_symbol=self._start_symbol,
|
||||
)
|
||||
return p.parse(tokens=tokens)
|
||||
|
||||
def parse_leaf(self, leaf, error_recovery=True):
|
||||
code = leaf._get_payload()
|
||||
return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
|
||||
|
||||
|
||||
def load_grammar(**kwargs):
|
||||
"""
|
||||
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
||||
@@ -273,10 +243,6 @@ def load_grammar(**kwargs):
|
||||
except FileNotFoundError:
|
||||
message = "Python version %s is currently not supported." % version
|
||||
raise NotImplementedError(message)
|
||||
elif language == 'python-f-string':
|
||||
if version is not None:
|
||||
raise NotImplementedError("Currently different versions are not supported.")
|
||||
return PythonFStringGrammar()
|
||||
else:
|
||||
raise NotImplementedError("No support for language %s." % language)
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ class ParserGenerator(object):
|
||||
c = grammar.Grammar(self._bnf_text)
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
# TODO do we still need this?
|
||||
names.remove(self.startsymbol)
|
||||
names.insert(0, self.startsymbol)
|
||||
for name in names:
|
||||
@@ -316,8 +317,8 @@ class ParserGenerator(object):
|
||||
|
||||
def _expect(self, type):
|
||||
if self.type != type:
|
||||
self._raise_error("expected %s, got %s(%s)",
|
||||
type, self.type, self.value)
|
||||
self._raise_error("expected %s(%s), got %s(%s)",
|
||||
type, token.tok_name[type], self.type, self.value)
|
||||
value = self.value
|
||||
self._gettoken()
|
||||
return value
|
||||
|
||||
@@ -133,7 +133,7 @@ class DiffParser(object):
|
||||
LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
|
||||
|
||||
for operation, i1, i2, j1, j2 in opcodes:
|
||||
LOG.debug('diff %s old[%s:%s] new[%s:%s]',
|
||||
LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]',
|
||||
operation, i1 + 1, i2, j1 + 1, j2)
|
||||
|
||||
if j2 == line_length and new_lines[-1] == '':
|
||||
@@ -454,7 +454,7 @@ class _NodesStack(object):
|
||||
self._last_prefix = ''
|
||||
if is_endmarker:
|
||||
try:
|
||||
separation = last_leaf.prefix.rindex('\n')
|
||||
separation = last_leaf.prefix.rindex('\n') + 1
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
@@ -462,7 +462,7 @@ class _NodesStack(object):
|
||||
# That is not relevant if parentheses were opened. Always parse
|
||||
# until the end of a line.
|
||||
last_leaf.prefix, self._last_prefix = \
|
||||
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||
last_leaf.prefix[:separation], last_leaf.prefix[separation:]
|
||||
|
||||
first_leaf = tree_nodes[0].get_first_leaf()
|
||||
first_leaf.prefix = self.prefix + first_leaf.prefix
|
||||
@@ -472,7 +472,6 @@ class _NodesStack(object):
|
||||
self.prefix = last_leaf.prefix
|
||||
|
||||
tree_nodes = tree_nodes[:-1]
|
||||
|
||||
return tree_nodes
|
||||
|
||||
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
||||
@@ -492,6 +491,13 @@ class _NodesStack(object):
|
||||
new_tos = tos
|
||||
for node in nodes:
|
||||
if node.type == 'endmarker':
|
||||
# We basically removed the endmarker, but we are not allowed to
|
||||
# remove the newline at the end of the line, otherwise it's
|
||||
# going to be missing.
|
||||
try:
|
||||
self.prefix = node.prefix[:node.prefix.rindex('\n') + 1]
|
||||
except ValueError:
|
||||
pass
|
||||
# Endmarkers just distort all the checks below. Remove them.
|
||||
break
|
||||
|
||||
|
||||
@@ -563,7 +563,8 @@ class _ReturnAndYieldChecks(SyntaxRule):
|
||||
and self._normalizer.version == (3, 5):
|
||||
self.add_issue(self.get_node(leaf), message=self.message_async_yield)
|
||||
|
||||
@ErrorFinder.register_rule(type='atom')
|
||||
|
||||
@ErrorFinder.register_rule(type='strings')
|
||||
class _BytesAndStringMix(SyntaxRule):
|
||||
# e.g. 's' b''
|
||||
message = "cannot mix bytes and nonbytes literals"
|
||||
@@ -744,7 +745,12 @@ class _NonlocalModuleLevelRule(SyntaxRule):
|
||||
|
||||
@ErrorFinder.register_rule(type='arglist')
|
||||
class _ArglistRule(SyntaxRule):
|
||||
message = "Generator expression must be parenthesized if not sole argument"
|
||||
@property
|
||||
def message(self):
|
||||
if self._normalizer.version < (3, 7):
|
||||
return "Generator expression must be parenthesized if not sole argument"
|
||||
else:
|
||||
return "Generator expression must be parenthesized"
|
||||
|
||||
def is_issue(self, node):
|
||||
first_arg = node.children[0]
|
||||
@@ -837,101 +843,36 @@ class _TryStmtRule(SyntaxRule):
|
||||
self.add_issue(default_except, message=self.message)
|
||||
|
||||
|
||||
@ErrorFinder.register_rule(type='string')
|
||||
@ErrorFinder.register_rule(type='fstring')
|
||||
class _FStringRule(SyntaxRule):
|
||||
_fstring_grammar = None
|
||||
message_empty = "f-string: empty expression not allowed" # f'{}'
|
||||
message_single_closing = "f-string: single '}' is not allowed" # f'}'
|
||||
message_nested = "f-string: expressions nested too deeply"
|
||||
message_backslash = "f-string expression part cannot include a backslash" # f'{"\"}' or f'{"\\"}'
|
||||
message_comment = "f-string expression part cannot include '#'" # f'{#}'
|
||||
message_unterminated_string = "f-string: unterminated string" # f'{"}'
|
||||
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
||||
message_incomplete = "f-string: expecting '}'" # f'{'
|
||||
message_syntax = "invalid syntax"
|
||||
|
||||
@classmethod
|
||||
def _load_grammar(cls):
|
||||
import parso
|
||||
def _check_format_spec(self, format_spec, depth):
|
||||
self._check_fstring_contents(format_spec.children[1:], depth)
|
||||
|
||||
if cls._fstring_grammar is None:
|
||||
cls._fstring_grammar = parso.load_grammar(language='python-f-string')
|
||||
return cls._fstring_grammar
|
||||
def _check_fstring_expr(self, fstring_expr, depth):
|
||||
if depth >= 2:
|
||||
self.add_issue(fstring_expr, message=self.message_nested)
|
||||
|
||||
conversion = fstring_expr.children[2]
|
||||
if conversion.type == 'fstring_conversion':
|
||||
name = conversion.children[1]
|
||||
if name.value not in ('s', 'r', 'a'):
|
||||
self.add_issue(name, message=self.message_conversion)
|
||||
|
||||
format_spec = fstring_expr.children[-2]
|
||||
if format_spec.type == 'fstring_format_spec':
|
||||
self._check_format_spec(format_spec, depth + 1)
|
||||
|
||||
def is_issue(self, fstring):
|
||||
if 'f' not in fstring.string_prefix.lower():
|
||||
return
|
||||
self._check_fstring_contents(fstring.children[1:-1])
|
||||
|
||||
parsed = self._load_grammar().parse_leaf(fstring)
|
||||
for child in parsed.children:
|
||||
if child.type == 'expression':
|
||||
self._check_expression(child)
|
||||
elif child.type == 'error_node':
|
||||
next_ = child.get_next_leaf()
|
||||
if next_.type == 'error_leaf' and next_.original_type == 'unterminated_string':
|
||||
self.add_issue(next_, message=self.message_unterminated_string)
|
||||
# At this point nothing more is comming except the error
|
||||
# leaf that we've already checked here.
|
||||
break
|
||||
self.add_issue(child, message=self.message_incomplete)
|
||||
elif child.type == 'error_leaf':
|
||||
self.add_issue(child, message=self.message_single_closing)
|
||||
|
||||
def _check_python_expr(self, python_expr):
|
||||
value = python_expr.value
|
||||
if '\\' in value:
|
||||
self.add_issue(python_expr, message=self.message_backslash)
|
||||
return
|
||||
if '#' in value:
|
||||
self.add_issue(python_expr, message=self.message_comment)
|
||||
return
|
||||
if re.match('\s*$', value) is not None:
|
||||
self.add_issue(python_expr, message=self.message_empty)
|
||||
return
|
||||
|
||||
# This is now nested parsing. We parsed the fstring and now
|
||||
# we're parsing Python again.
|
||||
try:
|
||||
# CPython has a bit of a special ways to parse Python code within
|
||||
# f-strings. It wraps the code in brackets to make sure that
|
||||
# whitespace doesn't make problems (indentation/newlines).
|
||||
# Just use that algorithm as well here and adapt start positions.
|
||||
start_pos = python_expr.start_pos
|
||||
start_pos = start_pos[0], start_pos[1] - 1
|
||||
eval_input = self._normalizer.grammar._parse(
|
||||
'(%s)' % value,
|
||||
start_symbol='eval_input',
|
||||
start_pos=start_pos,
|
||||
error_recovery=False
|
||||
)
|
||||
except ParserSyntaxError as e:
|
||||
self.add_issue(e.error_leaf, message=self.message_syntax)
|
||||
return
|
||||
|
||||
issues = self._normalizer.grammar.iter_errors(eval_input)
|
||||
self._normalizer.issues += issues
|
||||
|
||||
def _check_format_spec(self, format_spec):
|
||||
for expression in format_spec.children[1:]:
|
||||
nested_format_spec = expression.children[-2]
|
||||
if nested_format_spec.type == 'format_spec':
|
||||
if len(nested_format_spec.children) > 1:
|
||||
self.add_issue(
|
||||
nested_format_spec.children[1],
|
||||
message=self.message_nested
|
||||
)
|
||||
|
||||
self._check_expression(expression)
|
||||
|
||||
def _check_expression(self, expression):
|
||||
for c in expression.children:
|
||||
if c.type == 'python_expr':
|
||||
self._check_python_expr(c)
|
||||
elif c.type == 'conversion':
|
||||
if c.value not in ('s', 'r', 'a'):
|
||||
self.add_issue(c, message=self.message_conversion)
|
||||
elif c.type == 'format_spec':
|
||||
self._check_format_spec(c)
|
||||
def _check_fstring_contents(self, children, depth=0):
|
||||
for fstring_content in children:
|
||||
if fstring_content.type == 'fstring_expr':
|
||||
self._check_fstring_expr(fstring_content, depth)
|
||||
|
||||
|
||||
class _CheckAssignmentRule(SyntaxRule):
|
||||
@@ -944,7 +885,7 @@ class _CheckAssignmentRule(SyntaxRule):
|
||||
first, second = node.children[:2]
|
||||
error = _get_comprehension_type(node)
|
||||
if error is None:
|
||||
if second.type in ('dictorsetmaker', 'string'):
|
||||
if second.type == 'dictorsetmaker':
|
||||
error = 'literal'
|
||||
elif first in ('(', '['):
|
||||
if second.type == 'yield_expr':
|
||||
@@ -963,7 +904,7 @@ class _CheckAssignmentRule(SyntaxRule):
|
||||
error = 'Ellipsis'
|
||||
elif type_ == 'comparison':
|
||||
error = 'comparison'
|
||||
elif type_ in ('string', 'number'):
|
||||
elif type_ in ('string', 'number', 'strings'):
|
||||
error = 'literal'
|
||||
elif type_ == 'yield_expr':
|
||||
# This one seems to be a slightly different warning in Python.
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
import re
|
||||
|
||||
from itertools import count
|
||||
from parso.utils import PythonVersionInfo
|
||||
from parso.utils import split_lines
|
||||
from parso.python.tokenize import Token
|
||||
from parso import parser
|
||||
from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
|
||||
|
||||
version36 = PythonVersionInfo(3, 6)
|
||||
|
||||
|
||||
class TokenNamespace:
|
||||
_c = count()
|
||||
LBRACE = next(_c)
|
||||
RBRACE = next(_c)
|
||||
ENDMARKER = next(_c)
|
||||
COLON = next(_c)
|
||||
CONVERSION = next(_c)
|
||||
PYTHON_EXPR = next(_c)
|
||||
EXCLAMATION_MARK = next(_c)
|
||||
UNTERMINATED_STRING = next(_c)
|
||||
|
||||
token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
|
||||
|
||||
@classmethod
|
||||
def generate_token_id(cls, string):
|
||||
if string == '{':
|
||||
return cls.LBRACE
|
||||
elif string == '}':
|
||||
return cls.RBRACE
|
||||
elif string == '!':
|
||||
return cls.EXCLAMATION_MARK
|
||||
elif string == ':':
|
||||
return cls.COLON
|
||||
return getattr(cls, string)
|
||||
|
||||
|
||||
GRAMMAR = """
|
||||
fstring: expression* ENDMARKER
|
||||
format_spec: ':' expression*
|
||||
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
|
||||
"""
|
||||
|
||||
_prefix = r'((?:[^{}]+)*)'
|
||||
_expr = _prefix + r'(\{|\}|$)'
|
||||
_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
|
||||
# There's only one conversion character allowed. But the rules have to be
|
||||
# checked later anyway, so allow more here. This makes error recovery nicer.
|
||||
_conversion = r'([^={}:]*)(.?)'
|
||||
|
||||
_compiled_expr = re.compile(_expr)
|
||||
_compiled_in_expr = re.compile(_in_expr)
|
||||
_compiled_conversion = re.compile(_conversion)
|
||||
|
||||
|
||||
def tokenize(code, start_pos=(1, 0)):
|
||||
def add_to_pos(string):
|
||||
lines = split_lines(string)
|
||||
l = len(lines[-1])
|
||||
if len(lines) > 1:
|
||||
start_pos[0] += len(lines) - 1
|
||||
start_pos[1] = l
|
||||
else:
|
||||
start_pos[1] += l
|
||||
|
||||
def tok(value, type=None, prefix=''):
|
||||
if type is None:
|
||||
type = TokenNamespace.generate_token_id(value)
|
||||
|
||||
add_to_pos(prefix)
|
||||
token = Token(type, value, tuple(start_pos), prefix)
|
||||
add_to_pos(value)
|
||||
return token
|
||||
|
||||
start = 0
|
||||
recursion_level = 0
|
||||
added_prefix = ''
|
||||
start_pos = list(start_pos)
|
||||
while True:
|
||||
match = _compiled_expr.match(code, start)
|
||||
prefix = added_prefix + match.group(1)
|
||||
found = match.group(2)
|
||||
start = match.end()
|
||||
if not found:
|
||||
# We're at the end.
|
||||
break
|
||||
|
||||
if found == '}':
|
||||
if recursion_level == 0 and len(code) > start and code[start] == '}':
|
||||
# This is a }} escape.
|
||||
added_prefix = prefix + '}}'
|
||||
start += 1
|
||||
continue
|
||||
|
||||
recursion_level = max(0, recursion_level - 1)
|
||||
yield tok(found, prefix=prefix)
|
||||
added_prefix = ''
|
||||
else:
|
||||
assert found == '{'
|
||||
if recursion_level == 0 and len(code) > start and code[start] == '{':
|
||||
# This is a {{ escape.
|
||||
added_prefix = prefix + '{{'
|
||||
start += 1
|
||||
continue
|
||||
|
||||
recursion_level += 1
|
||||
yield tok(found, prefix=prefix)
|
||||
added_prefix = ''
|
||||
|
||||
expression = ''
|
||||
squared_count = 0
|
||||
curly_count = 0
|
||||
while True:
|
||||
expr_match = _compiled_in_expr.match(code, start)
|
||||
expression += expr_match.group(1)
|
||||
found = expr_match.group(2)
|
||||
start = expr_match.end()
|
||||
|
||||
if found == '{':
|
||||
curly_count += 1
|
||||
expression += found
|
||||
elif found == '}' and curly_count > 0:
|
||||
curly_count -= 1
|
||||
expression += found
|
||||
elif found == '[':
|
||||
squared_count += 1
|
||||
expression += found
|
||||
elif found == ']':
|
||||
# Use a max function here, because the Python code might
|
||||
# just have syntax errors.
|
||||
squared_count = max(0, squared_count - 1)
|
||||
expression += found
|
||||
elif found == ':' and (squared_count or curly_count):
|
||||
expression += found
|
||||
elif found in ('"', "'"):
|
||||
search = found
|
||||
if len(code) > start + 1 and \
|
||||
code[start] == found == code[start+1]:
|
||||
search *= 3
|
||||
start += 2
|
||||
|
||||
index = code.find(search, start)
|
||||
if index == -1:
|
||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
||||
yield tok(
|
||||
found + code[start:],
|
||||
type=TokenNamespace.UNTERMINATED_STRING,
|
||||
)
|
||||
start = len(code)
|
||||
break
|
||||
expression += found + code[start:index+1]
|
||||
start = index + 1
|
||||
elif found == '!' and len(code) > start and code[start] == '=':
|
||||
# This is a python `!=` and not a conversion.
|
||||
expression += found
|
||||
else:
|
||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
||||
if found:
|
||||
yield tok(found)
|
||||
break
|
||||
|
||||
if found == '!':
|
||||
conversion_match = _compiled_conversion.match(code, start)
|
||||
found = conversion_match.group(2)
|
||||
start = conversion_match.end()
|
||||
yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
|
||||
if found:
|
||||
yield tok(found)
|
||||
if found == '}':
|
||||
recursion_level -= 1
|
||||
|
||||
# We don't need to handle everything after ':', because that is
|
||||
# basically new tokens.
|
||||
|
||||
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
|
||||
|
||||
|
||||
class Parser(parser.BaseParser):
|
||||
def parse(self, tokens):
|
||||
node = super(Parser, self).parse(tokens)
|
||||
if isinstance(node, self.default_leaf): # Is an endmarker.
|
||||
# If there's no curly braces we get back a non-module. We always
|
||||
# want an fstring.
|
||||
node = self.default_node('fstring', [node])
|
||||
|
||||
return node
|
||||
|
||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||
# TODO this is so ugly.
|
||||
leaf_type = TokenNamespace.token_map[type].lower()
|
||||
return TypedLeaf(leaf_type, value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback):
|
||||
if not self._error_recovery:
|
||||
return super(Parser, self).error_recovery(
|
||||
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback
|
||||
)
|
||||
|
||||
token_type = TokenNamespace.token_map[typ].lower()
|
||||
if len(stack) == 1:
|
||||
error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
|
||||
stack[0][2][1].append(error_leaf)
|
||||
else:
|
||||
dfa, state, (type_, nodes) = stack[1]
|
||||
stack[0][2][1].append(ErrorNode(nodes))
|
||||
stack[1:] = []
|
||||
|
||||
add_token_callback(typ, value, start_pos, prefix)
|
||||
@@ -119,7 +119,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | STRING+)
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
|
||||
# default. It's more consistent like this.
|
||||
|
||||
@@ -104,7 +104,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | STRING+)
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
testlist_comp: test ( comp_for | (',' test)* [','] )
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
|
||||
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
|
||||
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
|
||||
@@ -110,7 +110,8 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
|
||||
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
@@ -148,3 +148,10 @@ encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
|
||||
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
@@ -148,3 +148,10 @@ encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from parso.python import tree
|
||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||
STRING, tok_name, NAME)
|
||||
STRING, tok_name, NAME, FSTRING_STRING,
|
||||
FSTRING_START, FSTRING_END)
|
||||
from parso.parser import BaseParser
|
||||
from parso.pgen2.parse import token_to_ilabel
|
||||
|
||||
@@ -50,6 +51,17 @@ class Parser(BaseParser):
|
||||
}
|
||||
default_node = tree.PythonNode
|
||||
|
||||
# Names/Keywords are handled separately
|
||||
_leaf_map = {
|
||||
STRING: tree.String,
|
||||
NUMBER: tree.Number,
|
||||
NEWLINE: tree.Newline,
|
||||
ENDMARKER: tree.EndMarker,
|
||||
FSTRING_STRING: tree.FStringString,
|
||||
FSTRING_START: tree.FStringStart,
|
||||
FSTRING_END: tree.FStringEnd,
|
||||
}
|
||||
|
||||
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
|
||||
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
|
||||
|
||||
@@ -121,16 +133,8 @@ class Parser(BaseParser):
|
||||
return tree.Keyword(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Name(value, start_pos, prefix)
|
||||
elif type == STRING:
|
||||
return tree.String(value, start_pos, prefix)
|
||||
elif type == NUMBER:
|
||||
return tree.Number(value, start_pos, prefix)
|
||||
elif type == NEWLINE:
|
||||
return tree.Newline(value, start_pos, prefix)
|
||||
elif type == ENDMARKER:
|
||||
return tree.EndMarker(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Operator(value, start_pos, prefix)
|
||||
|
||||
return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback):
|
||||
|
||||
@@ -32,6 +32,14 @@ if py_version < 35:
|
||||
ERROR_DEDENT = next(_counter)
|
||||
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
|
||||
|
||||
FSTRING_START = next(_counter)
|
||||
tok_name[FSTRING_START] = 'FSTRING_START'
|
||||
FSTRING_END = next(_counter)
|
||||
tok_name[FSTRING_END] = 'FSTRING_END'
|
||||
FSTRING_STRING = next(_counter)
|
||||
tok_name[FSTRING_STRING] = 'FSTRING_STRING'
|
||||
EXCLAMATION = next(_counter)
|
||||
tok_name[EXCLAMATION] = 'EXCLAMATION'
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
@@ -84,6 +92,7 @@ opmap_raw = """\
|
||||
//= DOUBLESLASHEQUAL
|
||||
-> RARROW
|
||||
... ELLIPSIS
|
||||
! EXCLAMATION
|
||||
"""
|
||||
|
||||
opmap = {}
|
||||
|
||||
@@ -20,14 +20,15 @@ from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
||||
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
||||
ERROR_DEDENT)
|
||||
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
|
||||
FSTRING_END)
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines
|
||||
|
||||
|
||||
TokenCollection = namedtuple(
|
||||
'TokenCollection',
|
||||
'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
|
||||
'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens',
|
||||
)
|
||||
|
||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||
@@ -52,32 +53,35 @@ def group(*choices, **kwargs):
|
||||
return start + '|'.join(choices) + ')'
|
||||
|
||||
|
||||
def any(*choices):
|
||||
return group(*choices) + '*'
|
||||
|
||||
|
||||
def maybe(*choices):
|
||||
return group(*choices) + '?'
|
||||
|
||||
|
||||
# Return the empty string, plus all of the valid string prefixes.
|
||||
def _all_string_prefixes(version_info):
|
||||
def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
|
||||
def different_case_versions(prefix):
|
||||
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
|
||||
yield ''.join(s)
|
||||
# The valid string prefixes. Only contain the lower case versions,
|
||||
# and don't contain any permuations (include 'fr', but not
|
||||
# 'rf'). The various permutations will be generated.
|
||||
_valid_string_prefixes = ['b', 'r', 'u']
|
||||
valid_string_prefixes = ['b', 'r', 'u']
|
||||
if version_info >= (3, 0):
|
||||
_valid_string_prefixes.append('br')
|
||||
valid_string_prefixes.append('br')
|
||||
|
||||
if version_info >= (3, 6):
|
||||
_valid_string_prefixes += ['f', 'fr']
|
||||
result = set([''])
|
||||
if version_info >= (3, 6) and include_fstring:
|
||||
f = ['f', 'fr']
|
||||
if only_fstring:
|
||||
valid_string_prefixes = f
|
||||
result = set()
|
||||
else:
|
||||
valid_string_prefixes += f
|
||||
elif only_fstring:
|
||||
return set()
|
||||
|
||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||
result = set([''])
|
||||
for prefix in _valid_string_prefixes:
|
||||
for prefix in valid_string_prefixes:
|
||||
for t in _itertools.permutations(prefix):
|
||||
# create a list with upper and lower versions of each
|
||||
# character
|
||||
@@ -102,6 +106,10 @@ def _get_token_collection(version_info):
|
||||
return result
|
||||
|
||||
|
||||
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
|
||||
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
|
||||
|
||||
|
||||
def _create_token_collection(version_info):
|
||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||
# number literals.
|
||||
@@ -141,6 +149,9 @@ def _create_token_collection(version_info):
|
||||
# StringPrefix can be the empty string (making it optional).
|
||||
possible_prefixes = _all_string_prefixes(version_info)
|
||||
StringPrefix = group(*possible_prefixes)
|
||||
StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
|
||||
fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
|
||||
FStringStart = group(*fstring_prefixes)
|
||||
|
||||
# Tail end of ' string.
|
||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||
@@ -150,14 +161,14 @@ def _create_token_collection(version_info):
|
||||
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||
# Tail end of """ string.
|
||||
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
||||
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@`|^=<>]=?",
|
||||
r"[+\-*/%&@`|^!=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
@@ -174,7 +185,12 @@ def _create_token_collection(version_info):
|
||||
group("'", r'\\\r?\n'),
|
||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||
group('"', r'\\\r?\n'))
|
||||
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
|
||||
pseudo_extra_pool = [Comment, Triple]
|
||||
all_quotes = '"', "'", '"""', "'''"
|
||||
if fstring_prefixes:
|
||||
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||
|
||||
PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
|
||||
PseudoToken = group(Whitespace, capture=True) + \
|
||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||
|
||||
@@ -192,18 +208,24 @@ def _create_token_collection(version_info):
|
||||
# including the opening quotes.
|
||||
single_quoted = set()
|
||||
triple_quoted = set()
|
||||
fstring_pattern_map = {}
|
||||
for t in possible_prefixes:
|
||||
for p in (t + '"', t + "'"):
|
||||
single_quoted.add(p)
|
||||
for p in (t + '"""', t + "'''"):
|
||||
triple_quoted.add(p)
|
||||
for quote in '"', "'":
|
||||
single_quoted.add(t + quote)
|
||||
|
||||
for quote in '"""', "'''":
|
||||
triple_quoted.add(t + quote)
|
||||
|
||||
for t in fstring_prefixes:
|
||||
for quote in all_quotes:
|
||||
fstring_pattern_map[t + quote] = quote
|
||||
|
||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||
'finally', 'while', 'with', 'return')
|
||||
pseudo_token_compiled = _compile(PseudoToken)
|
||||
return TokenCollection(
|
||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||
ALWAYS_BREAK_TOKENS
|
||||
fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
||||
)
|
||||
|
||||
|
||||
@@ -226,12 +248,104 @@ class PythonToken(Token):
|
||||
self._replace(type=self._get_type_name()))
|
||||
|
||||
|
||||
class FStringNode(object):
|
||||
def __init__(self, quote):
|
||||
self.quote = quote
|
||||
self.parentheses_count = 0
|
||||
self.previous_lines = ''
|
||||
self.last_string_start_pos = None
|
||||
# In the syntax there can be multiple format_spec's nested:
|
||||
# {x:{y:3}}
|
||||
self.format_spec_count = 0
|
||||
|
||||
def open_parentheses(self, character):
|
||||
self.parentheses_count += 1
|
||||
|
||||
def close_parentheses(self, character):
|
||||
self.parentheses_count -= 1
|
||||
|
||||
def allow_multiline(self):
|
||||
return len(self.quote) == 3
|
||||
|
||||
def is_in_expr(self):
|
||||
return (self.parentheses_count - self.format_spec_count) > 0
|
||||
|
||||
|
||||
def _check_fstring_ending(fstring_stack, token, from_start=False):
|
||||
fstring_end = float('inf')
|
||||
fstring_index = None
|
||||
for i, node in enumerate(fstring_stack):
|
||||
if from_start:
|
||||
if token.startswith(node.quote):
|
||||
fstring_index = i
|
||||
fstring_end = len(node.quote)
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
end = token.index(node.quote)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
if fstring_index is None or end < fstring_end:
|
||||
fstring_index = i
|
||||
fstring_end = end
|
||||
return fstring_index, fstring_end
|
||||
|
||||
|
||||
def _find_fstring_string(fstring_stack, line, lnum, pos):
|
||||
tos = fstring_stack[-1]
|
||||
if tos.is_in_expr():
|
||||
return '', pos
|
||||
else:
|
||||
new_pos = pos
|
||||
allow_multiline = tos.allow_multiline()
|
||||
if allow_multiline:
|
||||
match = fstring_string_multi_line.match(line, pos)
|
||||
else:
|
||||
match = fstring_string_single_line.match(line, pos)
|
||||
if match is None:
|
||||
string = tos.previous_lines
|
||||
else:
|
||||
if not tos.previous_lines:
|
||||
tos.last_string_start_pos = (lnum, pos)
|
||||
|
||||
string = match.group(0)
|
||||
for fstring_stack_node in fstring_stack:
|
||||
try:
|
||||
string = string[:string.index(fstring_stack_node.quote)]
|
||||
except ValueError:
|
||||
pass # The string was not found.
|
||||
|
||||
new_pos += len(string)
|
||||
if allow_multiline and string.endswith('\n'):
|
||||
tos.previous_lines += string
|
||||
string = ''
|
||||
else:
|
||||
string = tos.previous_lines + string
|
||||
|
||||
return string, new_pos
|
||||
|
||||
|
||||
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||
"""Generate tokens from a the source code (string)."""
|
||||
lines = split_lines(code, keepends=True)
|
||||
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||
|
||||
|
||||
def _print_tokens(func):
|
||||
"""
|
||||
A small helper function to help debug the tokenize_lines function.
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
for token in func(*args, **kwargs):
|
||||
print(token)
|
||||
yield token
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# @_print_tokens
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
@@ -240,7 +354,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
token. This idea comes from lib2to3. The prefix contains all information
|
||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||
"""
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \
|
||||
_get_token_collection(version_info)
|
||||
paren_level = 0 # count parentheses
|
||||
indents = [0]
|
||||
@@ -257,6 +371,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
additional_prefix = ''
|
||||
first = True
|
||||
lnum = start_pos[0] - 1
|
||||
fstring_stack = []
|
||||
for line in lines: # loop over lines in stream
|
||||
lnum += 1
|
||||
pos = 0
|
||||
@@ -287,6 +402,37 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
continue
|
||||
|
||||
while pos < max:
|
||||
if fstring_stack:
|
||||
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
|
||||
if string:
|
||||
yield PythonToken(
|
||||
FSTRING_STRING, string,
|
||||
fstring_stack[-1].last_string_start_pos,
|
||||
# Never has a prefix because it can start anywhere and
|
||||
# include whitespace.
|
||||
prefix=''
|
||||
)
|
||||
fstring_stack[-1].previous_lines = ''
|
||||
continue
|
||||
|
||||
if pos == max:
|
||||
break
|
||||
|
||||
rest = line[pos:]
|
||||
fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True)
|
||||
|
||||
if fstring_index is not None:
|
||||
yield PythonToken(
|
||||
FSTRING_END,
|
||||
fstring_stack[fstring_index].quote,
|
||||
(lnum, pos),
|
||||
prefix=additional_prefix,
|
||||
)
|
||||
additional_prefix = ''
|
||||
del fstring_stack[fstring_index:]
|
||||
pos += end
|
||||
continue
|
||||
|
||||
pseudomatch = pseudo_token.match(line, pos)
|
||||
if not pseudomatch: # scan for tokens
|
||||
txt = line[pos:]
|
||||
@@ -311,10 +457,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
|
||||
if new_line and initial not in '\r\n#':
|
||||
new_line = False
|
||||
if paren_level == 0:
|
||||
if paren_level == 0 and not fstring_stack:
|
||||
i = 0
|
||||
while line[i] == '\f':
|
||||
i += 1
|
||||
# TODO don't we need to change spos as well?
|
||||
start -= 1
|
||||
if start > indents[-1]:
|
||||
yield PythonToken(INDENT, '', spos, '')
|
||||
@@ -326,11 +473,33 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
|
||||
if fstring_stack:
|
||||
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
||||
if fstring_index is not None:
|
||||
if end != 0:
|
||||
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
|
||||
prefix = ''
|
||||
|
||||
yield PythonToken(
|
||||
FSTRING_END,
|
||||
fstring_stack[fstring_index].quote,
|
||||
(lnum, spos[1] + 1),
|
||||
prefix=prefix
|
||||
)
|
||||
del fstring_stack[fstring_index:]
|
||||
pos -= len(token) - end
|
||||
continue
|
||||
|
||||
if (initial in numchars or # ordinary number
|
||||
(initial == '.' and token != '.' and token != '...')):
|
||||
yield PythonToken(NUMBER, token, spos, prefix)
|
||||
elif initial in '\r\n':
|
||||
if not new_line and paren_level == 0:
|
||||
if any(not f.allow_multiline() for f in fstring_stack):
|
||||
# Would use fstring_stack.clear, but that's not available
|
||||
# in Python 2.
|
||||
fstring_stack[:] = []
|
||||
|
||||
if not new_line and paren_level == 0 and not fstring_stack:
|
||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
@@ -362,8 +531,12 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
break
|
||||
else: # ordinary string
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
elif token in fstring_pattern_map: # The start of an fstring.
|
||||
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||
yield PythonToken(FSTRING_START, token, spos, prefix)
|
||||
elif is_identifier(initial): # ordinary name
|
||||
if token in always_break_tokens:
|
||||
fstring_stack[:] = []
|
||||
paren_level = 0
|
||||
while True:
|
||||
indent = indents.pop()
|
||||
@@ -378,9 +551,18 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
break
|
||||
else:
|
||||
if token in '([{':
|
||||
paren_level += 1
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].open_parentheses(token)
|
||||
else:
|
||||
paren_level += 1
|
||||
elif token in ')]}':
|
||||
paren_level -= 1
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].close_parentheses(token)
|
||||
else:
|
||||
paren_level -= 1
|
||||
elif token == ':' and fstring_stack \
|
||||
and fstring_stack[-1].parentheses_count == 1:
|
||||
fstring_stack[-1].format_spec_count += 1
|
||||
|
||||
try:
|
||||
# This check is needed in any case to check if it's a valid
|
||||
|
||||
@@ -262,6 +262,33 @@ class String(Literal):
|
||||
return match.group(2)[:-len(match.group(1))]
|
||||
|
||||
|
||||
class FStringString(Leaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_string'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class FStringStart(Leaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_start'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class FStringEnd(Leaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_end'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class _StringComparisonMixin(object):
|
||||
def __eq__(self, other):
|
||||
"""
|
||||
|
||||
@@ -55,7 +55,6 @@ class NodeOrLeaf(object):
|
||||
Returns the node immediately preceding this node in this parent's
|
||||
children list. If this node does not have a previous sibling, it is
|
||||
None.
|
||||
None.
|
||||
"""
|
||||
# Can't use index(); we need to test by identity
|
||||
for i, child in enumerate(self.parent.children):
|
||||
@@ -339,7 +338,7 @@ class Node(BaseNode):
|
||||
|
||||
class ErrorNode(BaseNode):
|
||||
"""
|
||||
A node that containes valid nodes/leaves that we're follow by a token that
|
||||
A node that contains valid nodes/leaves that we're follow by a token that
|
||||
was invalid. This basically means that the leaf after this node is where
|
||||
Python would mark a syntax error.
|
||||
"""
|
||||
|
||||
@@ -141,7 +141,7 @@ FAILING_EXAMPLES = [
|
||||
|
||||
# f-strings
|
||||
'f"{}"',
|
||||
'f"{\\}"',
|
||||
r'f"{\}"',
|
||||
'f"{\'\\\'}"',
|
||||
'f"{#}"',
|
||||
"f'{1!b}'",
|
||||
|
||||
@@ -484,3 +484,21 @@ def test_indentation_issue(differ):
|
||||
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=2)
|
||||
|
||||
|
||||
def test_endmarker_newline(differ):
|
||||
code1 = dedent('''\
|
||||
docu = None
|
||||
# some comment
|
||||
result = codet
|
||||
incomplete_dctassign = {
|
||||
"module"
|
||||
|
||||
if "a":
|
||||
x = 3 # asdf
|
||||
''')
|
||||
|
||||
code2 = code1.replace('codet', 'coded')
|
||||
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
import pytest
|
||||
from textwrap import dedent
|
||||
|
||||
from parso import load_grammar, ParserSyntaxError
|
||||
from parso.python.fstring import tokenize
|
||||
from parso.python.tokenize import tokenize
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def grammar():
|
||||
return load_grammar(language="python-f-string")
|
||||
return load_grammar(version='3.6')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'code', [
|
||||
'{1}',
|
||||
'{1:}',
|
||||
'',
|
||||
'{1!a}',
|
||||
'{1!a:1}',
|
||||
@@ -26,22 +28,12 @@ def grammar():
|
||||
'{{{1}',
|
||||
'1{{2{{3',
|
||||
'}}',
|
||||
'{:}}}',
|
||||
|
||||
# Invalid, but will be checked, later.
|
||||
'{}',
|
||||
'{1:}',
|
||||
'{:}',
|
||||
'{:1}',
|
||||
'{!:}',
|
||||
'{!}',
|
||||
'{!a}',
|
||||
'{1:{}}',
|
||||
'{1:{:}}',
|
||||
]
|
||||
)
|
||||
def test_valid(code, grammar):
|
||||
fstring = grammar.parse(code, error_recovery=False)
|
||||
code = 'f"""%s"""' % code
|
||||
module = grammar.parse(code, error_recovery=False)
|
||||
fstring = module.children[0]
|
||||
assert fstring.type == 'fstring'
|
||||
assert fstring.get_code() == code
|
||||
|
||||
@@ -52,24 +44,46 @@ def test_valid(code, grammar):
|
||||
'{',
|
||||
'{1!{a}}',
|
||||
'{!{a}}',
|
||||
'{}',
|
||||
'{:}',
|
||||
'{:}}}',
|
||||
'{:1}',
|
||||
'{!:}',
|
||||
'{!}',
|
||||
'{!a}',
|
||||
'{1:{}}',
|
||||
'{1:{:}}',
|
||||
]
|
||||
)
|
||||
def test_invalid(code, grammar):
|
||||
code = 'f"""%s"""' % code
|
||||
with pytest.raises(ParserSyntaxError):
|
||||
grammar.parse(code, error_recovery=False)
|
||||
|
||||
# It should work with error recovery.
|
||||
#grammar.parse(code, error_recovery=True)
|
||||
grammar.parse(code, error_recovery=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('code', 'start_pos', 'positions'), [
|
||||
('code', 'positions'), [
|
||||
# 2 times 2, 5 because python expr and endmarker.
|
||||
('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
|
||||
(' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
|
||||
('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
|
||||
('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
|
||||
('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
|
||||
(1, 10), (1, 11), (1, 12), (1, 13)]),
|
||||
('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
|
||||
(4, 2), (4, 5)]),
|
||||
]
|
||||
)
|
||||
def test_tokenize_start_pos(code, start_pos, positions):
|
||||
tokens = tokenize(code, start_pos)
|
||||
def test_tokenize_start_pos(code, positions):
|
||||
tokens = list(tokenize(code, version_info=(3, 6)))
|
||||
assert positions == [p.start_pos for p in tokens]
|
||||
|
||||
|
||||
def test_roundtrip(grammar):
|
||||
code = dedent("""\
|
||||
f'''s{
|
||||
str.uppe
|
||||
'''
|
||||
""")
|
||||
tree = grammar.parse(code)
|
||||
assert tree.get_code() == code
|
||||
|
||||
@@ -114,6 +114,22 @@ def _get_actual_exception(code):
|
||||
# Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
|
||||
# certain places. But in others this error makes sense.
|
||||
return [wanted, "SyntaxError: can't use starred expression here"], line_nr
|
||||
elif wanted == 'SyntaxError: f-string: unterminated string':
|
||||
wanted = 'SyntaxError: EOL while scanning string literal'
|
||||
elif wanted == 'SyntaxError: f-string expression part cannot include a backslash':
|
||||
return [
|
||||
wanted,
|
||||
"SyntaxError: EOL while scanning string literal",
|
||||
"SyntaxError: unexpected character after line continuation character",
|
||||
], line_nr
|
||||
elif wanted == "SyntaxError: f-string: expecting '}'":
|
||||
wanted = 'SyntaxError: EOL while scanning string literal'
|
||||
elif wanted == 'SyntaxError: f-string: empty expression not allowed':
|
||||
wanted = 'SyntaxError: invalid syntax'
|
||||
elif wanted == "SyntaxError: f-string expression part cannot include '#'":
|
||||
wanted = 'SyntaxError: invalid syntax'
|
||||
elif wanted == "SyntaxError: f-string: single '}' is not allowed":
|
||||
wanted = 'SyntaxError: invalid syntax'
|
||||
return [wanted], line_nr
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,8 @@ import pytest
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines, parse_version_string
|
||||
from parso.python.token import (
|
||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
|
||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
|
||||
FSTRING_START)
|
||||
from parso.python import tokenize
|
||||
from parso import parse
|
||||
from parso.python.tokenize import PythonToken
|
||||
@@ -162,8 +163,9 @@ def test_ur_literals():
|
||||
token_list = _get_token_list(literal)
|
||||
typ, result_literal, _, _ = token_list[0]
|
||||
if is_literal:
|
||||
assert typ == STRING
|
||||
assert result_literal == literal
|
||||
if typ != FSTRING_START:
|
||||
assert typ == STRING
|
||||
assert result_literal == literal
|
||||
else:
|
||||
assert typ == NAME
|
||||
|
||||
@@ -175,6 +177,7 @@ def test_ur_literals():
|
||||
# Starting with Python 3.3 this ordering is also possible.
|
||||
if py_version >= 33:
|
||||
check('Rb""')
|
||||
|
||||
# Starting with Python 3.6 format strings where introduced.
|
||||
check('fr""', is_literal=py_version >= 36)
|
||||
check('rF""', is_literal=py_version >= 36)
|
||||
|
||||
5
tox.ini
5
tox.ini
@@ -1,14 +1,15 @@
|
||||
[tox]
|
||||
envlist = py26, py27, py33, py34, py35, py36
|
||||
envlist = py27, py33, py34, py35, py36, py37
|
||||
[testenv]
|
||||
deps =
|
||||
pytest>=3.0.7
|
||||
{env:_PARSO_TEST_PYTEST_DEP:pytest>=3.0.7}
|
||||
# For --lf and --ff.
|
||||
pytest-cache
|
||||
setenv =
|
||||
# https://github.com/tomchristie/django-rest-framework/issues/1957
|
||||
# tox corrupts __pycache__, solution from here:
|
||||
PYTHONDONTWRITEBYTECODE=1
|
||||
py26,py33: _PARSO_TEST_PYTEST_DEP=pytest>=3.0.7,<3.3
|
||||
commands =
|
||||
pytest {posargs:parso test}
|
||||
[testenv:cov]
|
||||
|
||||
Reference in New Issue
Block a user