106 Commits

Author SHA1 Message Date
Dave Halter
acccb4f28d 0.3.4 release 2019-02-13 00:19:07 +01:00
Dave Halter
3f6fc8a5ad Fix an f-string tokenizer issue 2019-02-13 00:17:37 +01:00
Dave Halter
f1ee7614c9 Release of 0.3.3 2019-02-06 09:55:18 +01:00
Dave Halter
58850f8bfa Rename a test 2019-02-06 09:51:46 +01:00
Dave Halter
d38a60278e Remove some unused code 2019-02-06 09:50:27 +01:00
Dave Halter
6c65aea47d Fix working with async functions in the diff parser, fixes #56 2019-02-06 09:31:46 +01:00
Dave Halter
0d37ff865c Fix bytes/fstring mixing when using iter_errors, fixes #57. 2019-02-06 01:28:47 +01:00
Dave Halter
076e296497 Improve a docstring, fixes #55. 2019-01-26 21:34:56 +01:00
Dave Halter
a2b153e3c1 Upgrade the Changelog 2019-01-24 00:42:53 +01:00
Dave Halter
bb2855897b Escape a backslash properly 2019-01-24 00:31:26 +01:00
Dave Halter
9c9e6ffede Bump the parso version to 0.3.2 2019-01-24 00:22:30 +01:00
Dave Halter
b5d8175eaa f-string parts are also PythonLeaf instances 2019-01-24 00:15:53 +01:00
Dave Halter
32a83b932a Fix get_start_pos_of_prefix 2019-01-24 00:00:06 +01:00
Dave Halter
01ae01a382 Remove dead code 2019-01-23 23:28:18 +01:00
Dave Halter
5fbc207892 Refactor f-string support 2019-01-23 10:58:36 +01:00
Dave Halter
60e4591837 Fix: End detection for strings was mostly wrong, fixes #51 2019-01-23 10:13:25 +01:00
Dave Halter
ef56debb78 Fix f-string escapes, fixes #48
The tokenizer was not detecting backslash escapes for f-string endings properly
2019-01-22 22:20:32 +01:00
Dave Halter
dc2582f488 Tokenizer: Simplify end of string regexes 2019-01-22 09:33:57 +01:00
Dave Halter
fe69989fbc Add a comment from the Python3.7 code base 2019-01-21 00:33:02 +01:00
Dave Halter
ce8b531175 Fix diff parser: The previous fix was a bit off 2019-01-20 19:03:45 +01:00
Dave Halter
069c08883a Change fuzzer: Add ways to not always use correct parse input 2019-01-20 18:18:13 +01:00
Dave Halter
0da0a8655a Fix diff parser: issue with opening brackets 2019-01-20 00:41:11 +01:00
Dave Halter
3d890c3a00 Async doesn't work in 3.4 2019-01-19 12:59:35 +01:00
Dave Halter
956ea55048 Skip some tests for Python2.6 and Python3.3 2019-01-19 12:08:39 +01:00
Dave Halter
0bd17bee2c Fix diff parser: DEDENT as error leaves should also be ignored and reparsed 2019-01-18 18:41:08 +01:00
Dave Halter
f3015efb2d Fix diff parser: error dedents in between nodes should be ignored for now when copying 2019-01-18 02:43:12 +01:00
Dave Halter
197391dc53 Fix diff parser: Don't copy error nodes/leaves in the beginning, leads to strange issues in some cases 2019-01-17 23:48:00 +01:00
Dave Halter
32321a74b1 Diff fuzzer: Create a check to see if the errors make sense. 2019-01-17 22:07:59 +01:00
Dave Halter
52d01685ba Fix diff parser: Don't copy DEDENT tokens at the beginning 2019-01-17 21:31:13 +01:00
Dave Halter
e591b929eb Fix diff parser: Skip last leaves for last line offset leaves 2019-01-17 00:15:38 +01:00
Dave Halter
dac4c445a7 Fix indentation error tokens 2019-01-16 23:21:31 +01:00
Dave Halter
20fd32b45d Fix diff parser: Avoid side effects for prefix 2019-01-14 21:37:19 +01:00
Dave Halter
9cc8178998 Fix tokenizer: backslashes sometimes led to newline token generation 2019-01-14 09:59:16 +01:00
Dave Halter
1e25445176 Make lines easier copyable in the fuzzer 2019-01-14 01:50:39 +01:00
Dave Halter
d7171ae927 Fix tokenizer: Carriage returns after backslashes were not properly handled 2019-01-14 01:49:09 +01:00
Dave Halter
d3d28480ed Fix in diff parser: prefix calculation was wrong when copying nodes 2019-01-14 01:00:17 +01:00
Dave Halter
564be7882e Replace --print-diff with --print-code 2019-01-14 00:20:49 +01:00
Dave Halter
76c5754b76 Fix diff parser generation for empty files 2019-01-13 23:38:35 +01:00
Dave Halter
55247a5a2c Docopt should not be needed for tests 2019-01-13 23:24:17 +01:00
Dave Halter
7ae1efe5c7 Fix tokenizer: Form feeds and multiline docstrings didn't work together 2019-01-13 23:16:09 +01:00
Dave Halter
01dba7f8ce Fix diff parser: Need to calculate the prefix for the diff tokenizer better 2019-01-13 22:38:53 +01:00
Dave Halter
ea8a758051 Remove copied nodes stuff, to simplify some things 2019-01-13 19:57:23 +01:00
Dave Halter
a7e24a37e7 Fix newline endings and a few parser/copy counts 2019-01-13 19:55:18 +01:00
Dave Halter
f80d9de7a0 Feature: The diff parser fuzzer is now able to use random Python fragments
This hopefully leads to the fuzzer finding more and faster issues in the diff
parser.
2019-01-13 16:00:36 +01:00
Dave Halter
eaee2b9ca0 Fix: The Python 3.8 grammar did not include f-string support 2019-01-13 15:51:46 +01:00
Dave Halter
dd1761da96 Fix tokenizer: Closing parentheses in the wrong place should not lead to strange behavior 2019-01-13 14:52:33 +01:00
Dave Halter
e10802ab09 Fix end positions with error dedents 2019-01-13 14:14:16 +01:00
Dave Halter
3d402d0a77 Fix diff parser tests for Python 2 2019-01-10 09:26:42 +01:00
Dave Halter
f6a8b997f2 Randomize the fuzzer a bit more with inserting characters 2019-01-10 01:22:24 +01:00
Dave Halter
94c2681c8e Simplify the regexes 2019-01-10 01:21:56 +01:00
Dave Halter
610a820799 Fix a regex clause that was totally wrong 2019-01-10 01:00:08 +01:00
Dave Halter
57320af6eb Fix another tokenizer issue 2019-01-09 00:55:54 +01:00
Dave Halter
574e1c63e8 Apply \r changes in syntax trees 2019-01-09 00:34:19 +01:00
Dave Halter
fbaad7883f Actually make \r usable 2019-01-08 20:03:08 +01:00
Dave Halter
b1f613fe16 Fix split lines for Python code
Some characters like Vertical Tab or File Separator were used as line separators.
This is not legal. Line Separators in Python are only Carriage Return \r and Line Feed \n.
2019-01-08 08:42:30 +01:00
Dave Halter
f4696a6245 Add \r as a valid linebreak for splitlines 2019-01-07 18:46:16 +01:00
Dave Halter
48c1a0e590 Move split_lines tests around 2019-01-07 18:40:34 +01:00
Dave Halter
6f63147f69 Start generating really random strings with the fuzzer 2019-01-06 20:51:49 +01:00
Dave Halter
94bd48bae1 Fix tokenizer: Dedents before error tokens are properly done, now. 2019-01-06 19:26:49 +01:00
Dave Halter
edbceba4f8 Fix diff parser: Also check async with 2019-01-06 16:25:28 +01:00
Dave Halter
b33c2b3ae1 Make the diff parser use a lot of different files by default 2019-01-06 15:43:37 +01:00
Dave Halter
65a0748f4f Fix diff parser: Forgot that with statments are also flows 2019-01-06 15:41:16 +01:00
Dave Halter
c442cf98be Fix valid graph asserting for some dedents that are errors 2019-01-06 12:39:04 +01:00
Dave Halter
65b15b05e3 Fix diff parser: If funcs are not copied, errors shouldn't either 2019-01-06 11:39:51 +01:00
Dave Halter
26aee1c6a9 Better documentation for the fuzz diff parser script 2019-01-06 01:10:15 +01:00
Dave Halter
c88a862bae Rename a test 2019-01-06 01:08:15 +01:00
Dave Halter
d6b0585933 More verbose output for the diff fuzzer 2019-01-06 01:05:07 +01:00
Dave Halter
6eba40b4c5 Fix diff parser: error dedent issues 2019-01-06 01:00:34 +01:00
Dave Halter
428bde0573 Fix diff parser: Avoid indentation issues 2019-01-05 22:40:31 +01:00
Dave Halter
d1d866f6c6 Use the right diff order in debug output 2019-01-05 18:36:48 +01:00
Dave Halter
a8ec75fedd Fix diff parser: The prefix was wrong in some copy cases 2019-01-05 18:33:38 +01:00
Dave Halter
deaf1f310b Make fuzz parser compatible with Python 2 2019-01-05 14:57:58 +01:00
Dave Halter
2a881bf875 Make it possible to print all diffs in fuzzer 2019-01-05 14:50:59 +01:00
Dave Halter
4d713f56e9 Introduce a redo flag 'only_last' to narrow down issues 2019-01-05 14:20:30 +01:00
Dave Halter
d202fdea49 Add docopt to testing dependencies 2019-01-05 14:09:14 +01:00
Dave Halter
5e6d5dec59 Rewrite the fuzz diff parser to cache errors (so we can re-run those) 2019-01-05 14:05:19 +01:00
Dave Halter
c1846dd082 Fix diff parser: Decorators were sometimes parsed without their functions 2019-01-05 09:29:00 +01:00
Dave Halter
5da51720cd Fix tokenizer: Dedents should only happen after newlines 2019-01-03 11:44:17 +01:00
Dave Halter
fde64d0eae Usability for diff parser fuzzing 2019-01-02 17:31:07 +01:00
Dave Halter
430f13af5e Fix for diff parser: Rewrite prefix logic and don't mutate prematurely 2019-01-02 17:28:01 +01:00
Dave Halter
96ae6a078b Fix diff parser: positioning of functions if decorators were removed 2019-01-02 13:16:22 +01:00
Dave Halter
a9f58b7c45 Ignore ERROR_DEDENT in graph validation 2019-01-02 12:15:05 +01:00
Dave Halter
e0d0e57bd0 Add a small diff parser fuzzer
It should help us find the rest of the issues that the diff parser has
2019-01-02 11:26:31 +01:00
Dave Halter
d2542983e9 Fix diff parser: get_last_line was sometimes wrong
Now the calculation is way simpler. Still annoying that it even happened.
2019-01-02 01:39:53 +01:00
Dave Halter
64cf24d9da Fix error reporting order for diff issues 2019-01-02 00:33:43 +01:00
Dave Halter
02f48a68f2 Clean up the test diff parser file 2019-01-01 23:37:31 +01:00
Dave Halter
c7c464e5e9 Avoid nasty side effects in creation of Node
This issue led to bugs in Jedi, because Jedi used the nodes in a wrong way.
2019-01-01 23:35:20 +01:00
Dave Halter
29325d3052 Make parso errors even more informative 2018-12-31 11:47:02 +01:00
Dave Halter
750b8af37b Fix diff parser get_last_line calculation 2018-12-31 01:25:11 +01:00
Dave Halter
0126a38bd1 Fix graph asserting for error indents 2018-12-30 18:20:55 +01:00
Dave Halter
c2985c111e Better checks for checking valid graphs 2018-12-30 16:34:11 +01:00
Dave Halter
45f9d4b204 Create better ways for debugging the diff parser 2018-12-30 16:03:54 +01:00
Dave Halter
f99fe6ad21 Fix diff-parser: Copying parts of if else should not lead to the whole thing being copied 2018-12-30 15:25:17 +01:00
Dave Halter
a64c32bb2a Reenable diff parser parser counting in all tests 2018-12-30 02:46:44 +01:00
Dave Halter
e5fb1927bb Fix: Make the NodesStack to a NodesTree
This fixes an issue with positions that were doubled if the stack was closed too early.
2018-12-30 01:27:37 +01:00
Dave Halter
0ef4809377 Fix for diff parser : Make sure that start_pos are growing always
The problem was that functions/classes were sometimes not well positioned. Now
all diff tests are ensuring that leaves always grow.
2018-12-28 21:49:49 +01:00
Dave Halter
29456a6c0a Add a check to see if leaves have the right start positions 2018-12-28 02:24:22 +01:00
Dave Halter
ada84ed063 Add parso version to an exception 2018-12-27 13:33:10 +01:00
Thomas A Caswell
1c7b078db0 MNT: add grammar for python 3.8
Coppied from cpython commit 1dd035954bb03c41b954ebbd63969b4bcb0e106e
2018-12-22 12:39:08 +01:00
Hugo
930ec08ab0 Use SVG badges
And update some links to HTTPS.
2018-09-28 18:51:36 +02:00
Daniel Hahler
a90622040d tox.ini: simplify deps 2018-09-22 10:02:38 +02:00
Daniel Hahler
98c02f7d79 tox: add pypy to envlist for tox-travis
Fixes deprecation warning
(https://travis-ci.org/davidhalter/parso/jobs/431468986).
2018-09-22 10:02:38 +02:00
Daniel Hahler
d6d6c5038f setup.py: add "testing" extras_require
Ref: https://github.com/davidhalter/parso/issues/15#issuecomment-339964845
2018-09-22 10:02:38 +02:00
Michael Käufl
3be8ac7786 Add Python 3.7 stable to test matrix and update classifiers 2018-09-13 00:28:27 +02:00
Anders Hovmöller
96f1582b6e Update usage.rst 2018-08-02 22:17:14 +02:00
Dave Halter
7064ecf3fb Don't use invalid escape sequences in regex, see https://github.com/davidhalter/jedi-vim/issues/843 2018-07-12 08:53:48 +02:00
29 changed files with 1917 additions and 400 deletions

1
.gitignore vendored
View File

@@ -10,3 +10,4 @@
parso.egg-info/
/.cache/
/.pytest_cache
test/fuzz-redo.pickle

View File

@@ -6,14 +6,14 @@ python:
- 3.4
- 3.5
- 3.6
- 3.7-dev
- pypy
matrix:
allow_failures:
- env: TOXENV=cov
include:
- { python: "3.7", dist: xenial, sudo: true }
- python: 3.5
env: TOXENV=cov
allow_failures:
- env: TOXENV=cov
install:
- pip install --quiet tox-travis
script:

View File

@@ -3,6 +3,26 @@
Changelog
---------
0.3.4 (2018-02-13)
+++++++++++++++++++
- Fix an f-string tokenizer error
0.3.3 (2018-02-06)
+++++++++++++++++++
- Fix async errors in the diff parser
- A fix in iter_errors
- This is a very small bugfix release
0.3.2 (2018-01-24)
+++++++++++++++++++
- 20+ bugfixes in the diff parser and 3 in the tokenizer
- A fuzzer for the diff parser, to give confidence that the diff parser is in a
good shape.
- Some bugfixes for f-string
0.3.1 (2018-07-09)
+++++++++++++++++++

View File

@@ -2,12 +2,13 @@
parso - A Python Parser
###################################################################
.. image:: https://secure.travis-ci.org/davidhalter/parso.png?branch=master
:target: http://travis-ci.org/davidhalter/parso
:alt: Travis-CI build status
.. image:: https://coveralls.io/repos/davidhalter/parso/badge.png?branch=master
:target: https://coveralls.io/r/davidhalter/parso
.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master
:target: https://travis-ci.org/davidhalter/parso
:alt: Travis CI build status
.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master
:target: https://coveralls.io/github/davidhalter/parso?branch=master
:alt: Coverage Status
.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
@@ -55,10 +56,10 @@ To list multiple issues:
Resources
=========
- `Testing <http://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
- `PyPI <https://pypi.python.org/pypi/parso>`_
- `Docs <https://parso.readthedocs.org/en/latest/>`_
- Uses `semantic versioning <http://semver.org/>`_
- Uses `semantic versioning <https://semver.org/>`_
Installation
============

View File

@@ -57,6 +57,8 @@ def pytest_generate_tests(metafunc):
metafunc.parametrize('each_py2_version', VERSIONS_2)
elif 'each_py3_version' in metafunc.fixturenames:
metafunc.parametrize('each_py3_version', VERSIONS_3)
elif 'version_ge_py36' in metafunc.fixturenames:
metafunc.parametrize('version_ge_py36', ['3.6', '3.7'])
class NormalizerIssueCase(object):
@@ -151,8 +153,5 @@ def works_ge_py3(each_version):
@pytest.fixture
def works_ge_py35(each_version):
"""
Works only greater equal Python 3.3.
"""
version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 5))

View File

@@ -61,6 +61,8 @@ Used By
-------
- jedi_ (which is used by IPython and a lot of editor plugins).
- mutmut_ (mutation tester)
.. _jedi: https://github.com/davidhalter/jedi
.. _mutmut: https://github.com/boxed/mutmut

View File

@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
from parso.utils import split_lines, python_bytes_to_unicode
__version__ = '0.3.1'
__version__ = '0.3.4'
def parse(code=None, **kwargs):

View File

@@ -186,7 +186,7 @@ class Grammar(object):
return normalizer.issues
def __repr__(self):
nonterminals = self._pgen_grammar._nonterminal_to_dfas.keys()
nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()
txt = ' '.join(list(nonterminals)[:3]) + ' ...'
return '<%s:%s>' % (self.__class__.__name__, txt)

View File

@@ -152,9 +152,12 @@ class BaseParser(object):
def convert_node(self, nonterminal, children):
try:
return self.node_map[nonterminal](children)
node = self.node_map[nonterminal](children)
except KeyError:
return self.default_node(nonterminal, children)
node = self.default_node(nonterminal, children)
for c in children:
c.parent = node
return node
def convert_leaf(self, type_, value, prefix, start_pos):
try:

View File

@@ -17,6 +17,77 @@ from parso.python.tokenize import PythonToken
from parso.python.token import PythonTokenTypes
LOG = logging.getLogger(__name__)
DEBUG_DIFF_PARSER = False
_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
def _get_previous_leaf_if_indentation(leaf):
while leaf and leaf.type == 'error_leaf' \
and leaf.token_type in _INDENTATION_TOKENS:
leaf = leaf.get_previous_leaf()
return leaf
def _get_next_leaf_if_indentation(leaf):
while leaf and leaf.type == 'error_leaf' \
and leaf.token_type in _INDENTATION_TOKENS:
leaf = leaf.get_previous_leaf()
return leaf
def _assert_valid_graph(node):
"""
Checks if the parent/children relationship is correct.
This is a check that only runs during debugging/testing.
"""
try:
children = node.children
except AttributeError:
# Ignore INDENT is necessary, because indent/dedent tokens don't
# contain value/prefix and are just around, because of the tokenizer.
if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
assert not node.value
assert not node.prefix
return
# Calculate the content between two start positions.
previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
if previous_leaf is None:
content = node.prefix
previous_start_pos = 1, 0
else:
assert previous_leaf.end_pos <= node.start_pos, \
(previous_leaf, node)
content = previous_leaf.value + node.prefix
previous_start_pos = previous_leaf.start_pos
if '\n' in content or '\r' in content:
splitted = split_lines(content)
line = previous_start_pos[0] + len(splitted) - 1
actual = line, len(splitted[-1])
else:
actual = previous_start_pos[0], previous_start_pos[1] + len(content)
assert node.start_pos == actual, (node.start_pos, actual)
else:
for child in children:
assert child.parent == node, (node, child)
_assert_valid_graph(child)
def _get_debug_error_message(module, old_lines, new_lines):
current_lines = split_lines(module.get_code(), keepends=True)
current_diff = difflib.unified_diff(new_lines, current_lines)
old_new_diff = difflib.unified_diff(old_lines, new_lines)
import parso
return (
"There's an issue with the diff parser. Please "
"report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
% (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
)
def _get_last_line(node_or_leaf):
@@ -27,13 +98,21 @@ def _get_last_line(node_or_leaf):
return last_leaf.end_pos[0]
def _skip_dedent_error_leaves(leaf):
while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
leaf = leaf.get_previous_leaf()
return leaf
def _ends_with_newline(leaf, suffix=''):
leaf = _skip_dedent_error_leaves(leaf)
if leaf.type == 'error_leaf':
typ = leaf.token_type.lower()
else:
typ = leaf.type
return typ == 'newline' or suffix.endswith('\n')
return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
def _flows_finished(pgen_grammar, stack):
@@ -55,11 +134,15 @@ def _func_or_class_has_suite(node):
return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
def suite_or_file_input_is_valid(pgen_grammar, stack):
def _suite_or_file_input_is_valid(pgen_grammar, stack):
if not _flows_finished(pgen_grammar, stack):
return False
for stack_node in reversed(stack):
if stack_node.nonterminal == 'decorator':
# A decorator is only valid with the upcoming function.
return False
if stack_node.nonterminal == 'suite':
# If only newline is in the suite, the suite is not valid, yet.
return len(stack_node.nodes) > 1
@@ -69,11 +152,13 @@ def suite_or_file_input_is_valid(pgen_grammar, stack):
def _is_flow_node(node):
if node.type == 'async_stmt':
node = node.children[1]
try:
value = node.children[0].value
except AttributeError:
return False
return value in ('if', 'for', 'while', 'try')
return value in ('if', 'for', 'while', 'try', 'with')
class _PositionUpdatingFinished(Exception):
@@ -107,7 +192,7 @@ class DiffParser(object):
self._copy_count = 0
self._parser_count = 0
self._nodes_stack = _NodesStack(self._module)
self._nodes_tree = _NodesTree(self._module)
def update(self, old_lines, new_lines):
'''
@@ -136,11 +221,10 @@ class DiffParser(object):
line_length = len(new_lines)
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
opcodes = sm.get_opcodes()
LOG.debug('diff parser calculated')
LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
for operation, i1, i2, j1, j2 in opcodes:
LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]',
LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
operation, i1 + 1, i2, j1 + 1, j2)
if j2 == line_length and new_lines[-1] == '':
@@ -159,17 +243,24 @@ class DiffParser(object):
# With this action all change will finally be applied and we have a
# changed module.
self._nodes_stack.close()
self._nodes_tree.close()
if DEBUG_DIFF_PARSER:
# If there is reasonable suspicion that the diff parser is not
# behaving well, this should be enabled.
try:
assert self._module.get_code() == ''.join(new_lines)
_assert_valid_graph(self._module)
except AssertionError:
print(_get_debug_error_message(self._module, old_lines, new_lines))
raise
last_pos = self._module.end_pos[0]
if last_pos != line_length:
current_lines = split_lines(self._module.get_code(), keepends=True)
diff = difflib.unified_diff(current_lines, new_lines)
raise Exception(
"There's an issue (%s != %s) with the diff parser. Please report:\n%s"
% (last_pos, line_length, ''.join(diff))
('(%s != %s) ' % (last_pos, line_length))
+ _get_debug_error_message(self._module, old_lines, new_lines)
)
LOG.debug('diff parser end')
return self._module
@@ -178,28 +269,21 @@ class DiffParser(object):
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
copied_nodes = [None]
last_until_line = -1
while until_line_new > self._nodes_stack.parsed_until_line:
parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
while until_line_new > self._nodes_tree.parsed_until_line:
parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
if line_stmt is None:
# Parse 1 line at least. We don't need more, because we just
# want to get into a state where the old parser has statements
# again that can be copied (e.g. not lines within parentheses).
self._parse(self._nodes_stack.parsed_until_line + 1)
elif not copied_nodes:
# We have copied as much as possible (but definitely not too
# much). Therefore we just parse the rest.
# We might not reach the end, because there's a statement
# that is not finished.
self._parse(until_line_new)
self._parse(self._nodes_tree.parsed_until_line + 1)
else:
p_children = line_stmt.parent.children
index = p_children.index(line_stmt)
copied_nodes = self._nodes_stack.copy_nodes(
from_ = self._nodes_tree.parsed_until_line + 1
copied_nodes = self._nodes_tree.copy_nodes(
p_children[index:],
until_line_old,
line_offset
@@ -208,15 +292,19 @@ class DiffParser(object):
if copied_nodes:
self._copy_count += 1
from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
to = self._nodes_stack.parsed_until_line
to = self._nodes_tree.parsed_until_line
LOG.debug('diff actually copy %s to %s', from_, to)
LOG.debug('copy old[%s:%s] new[%s:%s]',
copied_nodes[0].start_pos[0],
copied_nodes[-1].end_pos[0] - 1, from_, to)
else:
# We have copied as much as possible (but definitely not too
# much). Therefore we just parse a bit more.
self._parse(self._nodes_tree.parsed_until_line + 1)
# Since there are potential bugs that might loop here endlessly, we
# just stop here.
assert last_until_line != self._nodes_stack.parsed_until_line \
or not copied_nodes, last_until_line
last_until_line = self._nodes_stack.parsed_until_line
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
last_until_line = self._nodes_tree.parsed_until_line
def _get_old_line_stmt(self, old_line):
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
@@ -227,46 +315,36 @@ class DiffParser(object):
node = leaf
while node.parent.type not in ('file_input', 'suite'):
node = node.parent
return node
# Make sure that if only the `else:` line of an if statement is
# copied that not the whole thing is going to be copied.
if node.start_pos[0] >= old_line:
return node
# Must be on the same line. Otherwise we need to parse that bit.
return None
def _get_before_insertion_node(self):
if self._nodes_stack.is_empty():
return None
line = self._nodes_stack.parsed_until_line + 1
node = self._new_module.get_last_leaf()
while True:
parent = node.parent
if parent.type in ('suite', 'file_input'):
assert node.end_pos[0] <= line
assert node.end_pos[1] == 0 or '\n' in self._prefix
return node
node = parent
def _parse(self, until_line):
"""
Parses at least until the given line, but might just parse more until a
valid state is reached.
"""
last_until_line = 0
while until_line > self._nodes_stack.parsed_until_line:
while until_line > self._nodes_tree.parsed_until_line:
node = self._try_parse_part(until_line)
nodes = node.children
self._nodes_stack.add_parsed_nodes(nodes)
self._nodes_tree.add_parsed_nodes(nodes)
LOG.debug(
'parse_part from %s to %s (to %s in part parser)',
nodes[0].get_start_pos_of_prefix()[0],
self._nodes_stack.parsed_until_line,
self._nodes_tree.parsed_until_line,
node.end_pos[0] - 1
)
# Since the tokenizer sometimes has bugs, we cannot be sure that
# this loop terminates. Therefore assert that there's always a
# change.
assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
last_until_line = self._nodes_stack.parsed_until_line
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
last_until_line = self._nodes_tree.parsed_until_line
def _try_parse_part(self, until_line):
"""
@@ -277,7 +355,7 @@ class DiffParser(object):
self._parser_count += 1
# TODO speed up, shouldn't copy the whole list all the time.
# memoryview?
parsed_until_line = self._nodes_stack.parsed_until_line
parsed_until_line = self._nodes_tree.parsed_until_line
lines_after = self._parser_lines_new[parsed_until_line:]
tokens = self._diff_tokenize(
lines_after,
@@ -319,10 +397,12 @@ class DiffParser(object):
# We are done here, only thing that can come now is an
# endmarker or another dedented code block.
typ, string, start_pos, prefix = next(tokens)
if '\n' in prefix:
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
if '\n' in prefix or '\r' in prefix:
prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
else:
prefix = ''
assert start_pos[1] >= len(prefix), repr(prefix)
if start_pos[1] - len(prefix) == 0:
prefix = ''
yield PythonToken(
PythonTokenTypes.ENDMARKER, '',
(start_pos[0] + line_offset, 0),
@@ -332,7 +412,7 @@ class DiffParser(object):
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
yield PythonToken(typ, string, start_pos, prefix)
# Check if the parser is actually in a valid suite state.
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
start_pos = start_pos[0] + 1, 0
while len(indents) > int(omitted_first_indent):
indents.pop()
@@ -346,17 +426,23 @@ class DiffParser(object):
yield PythonToken(typ, string, start_pos, prefix)
class _NodesStackNode(object):
ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
class _NodesTreeNode(object):
_ChildrenGroup = namedtuple('_ChildrenGroup', 'prefix children line_offset last_line_offset_leaf')
def __init__(self, tree_node, parent=None):
self.tree_node = tree_node
self.children_groups = []
self._children_groups = []
self.parent = parent
self._node_children = []
def close(self):
def finish(self):
children = []
for children_part, line_offset, last_line_offset_leaf in self.children_groups:
for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
first_leaf = _get_next_leaf_if_indentation(
children_part[0].get_first_leaf()
)
first_leaf.prefix = prefix + first_leaf.prefix
if line_offset != 0:
try:
_update_positions(
@@ -369,59 +455,61 @@ class _NodesStackNode(object):
for node in children:
node.parent = self.tree_node
def add(self, children, line_offset=0, last_line_offset_leaf=None):
group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
self.children_groups.append(group)
for node_child in self._node_children:
node_child.finish()
def add_child_node(self, child_node):
self._node_children.append(child_node)
def add_tree_nodes(self, prefix, children, line_offset=0, last_line_offset_leaf=None):
if last_line_offset_leaf is None:
last_line_offset_leaf = children[-1].get_last_leaf()
group = self._ChildrenGroup(prefix, children, line_offset, last_line_offset_leaf)
self._children_groups.append(group)
def get_last_line(self, suffix):
line = 0
if self.children_groups:
children_group = self.children_groups[-1]
last_leaf = children_group.children[-1].get_last_leaf()
line = last_leaf.end_pos[0]
if self._children_groups:
children_group = self._children_groups[-1]
last_leaf = _get_previous_leaf_if_indentation(
children_group.last_line_offset_leaf
)
# Calculate the line offsets
offset = children_group.line_offset
if offset:
# In case the line_offset is not applied to this specific leaf,
# just ignore it.
if last_leaf.line <= children_group.last_line_offset_leaf.line:
line += children_group.line_offset
line = last_leaf.end_pos[0] + children_group.line_offset
# Newlines end on the next line, which means that they would cover
# the next line. That line is not fully parsed at this point.
if _ends_with_newline(last_leaf, suffix):
line -= 1
line += suffix.count('\n')
if suffix and not suffix.endswith('\n'):
line += len(split_lines(suffix)) - 1
if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
# This is the end of a file (that doesn't end with a newline).
line += 1
if self._node_children:
return max(line, self._node_children[-1].get_last_line(suffix))
return line
class _NodesStack(object):
endmarker_type = 'endmarker'
class _NodesTree(object):
def __init__(self, module):
# Top of stack
self._tos = self._base_node = _NodesStackNode(module)
self._base_node = _NodesTreeNode(module)
self._working_stack = [self._base_node]
self._module = module
self._last_prefix = ''
self._prefix_remainder = ''
self.prefix = ''
def is_empty(self):
return not self._base_node.children
@property
def parsed_until_line(self):
return self._tos.get_last_line(self.prefix)
return self._working_stack[-1].get_last_line(self.prefix)
def _get_insertion_node(self, indentation_node):
indentation = indentation_node.start_pos[1]
# find insertion node
node = self._tos
while True:
node = self._working_stack[-1]
tree_node = node.tree_node
if tree_node.type == 'suite':
# A suite starts with NEWLINE, ...
@@ -436,46 +524,51 @@ class _NodesStack(object):
elif tree_node.type == 'file_input':
return node
node = self._close_tos()
def _close_tos(self):
self._tos.close()
self._tos = self._tos.parent
return self._tos
self._working_stack.pop()
def add_parsed_nodes(self, tree_nodes):
old_prefix = self.prefix
tree_nodes = self._remove_endmarker(tree_nodes)
if not tree_nodes:
self.prefix = old_prefix + self.prefix
return
assert tree_nodes[0].type != 'newline'
node = self._get_insertion_node(tree_nodes[0])
assert node.tree_node.type in ('suite', 'file_input')
node.add(tree_nodes)
node.add_tree_nodes(old_prefix, tree_nodes)
# tos = Top of stack
self._update_tos(tree_nodes[-1])
def _update_tos(self, tree_node):
if tree_node.type in ('suite', 'file_input'):
new_tos = _NodesTreeNode(tree_node)
new_tos.add_tree_nodes('', list(tree_node.children))
self._working_stack[-1].add_child_node(new_tos)
self._working_stack.append(new_tos)
self._update_tos(tree_node.children[-1])
elif _func_or_class_has_suite(tree_node):
self._update_tos(tree_node.children[-1])
def _remove_endmarker(self, tree_nodes):
"""
Helps cleaning up the tree nodes that get inserted.
"""
last_leaf = tree_nodes[-1].get_last_leaf()
is_endmarker = last_leaf.type == self.endmarker_type
self._last_prefix = ''
is_endmarker = last_leaf.type == 'endmarker'
self._prefix_remainder = ''
if is_endmarker:
try:
separation = last_leaf.prefix.rindex('\n') + 1
except ValueError:
pass
else:
separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
if separation > -1:
# Remove the whitespace part of the prefix after a newline.
# That is not relevant if parentheses were opened. Always parse
# until the end of a line.
last_leaf.prefix, self._last_prefix = \
last_leaf.prefix[:separation], last_leaf.prefix[separation:]
last_leaf.prefix, self._prefix_remainder = \
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
first_leaf = tree_nodes[0].get_first_leaf()
first_leaf.prefix = self.prefix + first_leaf.prefix
self.prefix = ''
if is_endmarker:
@@ -490,30 +583,35 @@ class _NodesStack(object):
Returns the number of tree nodes that were copied.
"""
tos = self._get_insertion_node(tree_nodes[0])
if tree_nodes[0].type in ('error_leaf', 'error_node'):
# Avoid copying errors in the beginning. Can lead to a lot of
# issues.
return []
new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
self._get_insertion_node(tree_nodes[0])
new_nodes, self._working_stack, self.prefix = self._copy_nodes(
list(self._working_stack),
tree_nodes,
until_line,
line_offset,
self.prefix,
)
return new_nodes
def _copy_nodes(self, tos, nodes, until_line, line_offset):
def _copy_nodes(self, working_stack, nodes, until_line, line_offset, prefix=''):
new_nodes = []
new_tos = tos
new_prefix = ''
for node in nodes:
if node.start_pos[0] > until_line:
break
if node.type == 'endmarker':
# We basically removed the endmarker, but we are not allowed to
# remove the newline at the end of the line, otherwise it's
# going to be missing.
try:
self.prefix = node.prefix[:node.prefix.rindex('\n') + 1]
except ValueError:
pass
# Endmarkers just distort all the checks below. Remove them.
break
if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
break
# TODO this check might take a bit of time for large files. We
# might want to change this to do more intelligent guessing or
# binary search.
@@ -526,73 +624,87 @@ class _NodesStack(object):
new_nodes.append(node)
if not new_nodes:
return [], tos
return [], working_stack, prefix
tos = working_stack[-1]
last_node = new_nodes[-1]
line_offset_index = -1
had_valid_suite_last = False
if _func_or_class_has_suite(last_node):
suite = last_node
while suite.type != 'suite':
suite = suite.children[-1]
suite_tos = _NodesStackNode(suite)
suite_tos = _NodesTreeNode(suite)
# Don't need to pass line_offset here, it's already done by the
# parent.
suite_nodes, recursive_tos = self._copy_nodes(
suite_tos, suite.children, until_line, line_offset)
suite_nodes, new_working_stack, new_prefix = self._copy_nodes(
working_stack + [suite_tos], suite.children, until_line, line_offset
)
if len(suite_nodes) < 2:
# A suite only with newline is not valid.
new_nodes.pop()
new_prefix = ''
else:
suite_tos.parent = tos
new_tos = recursive_tos
line_offset_index = -2
elif (last_node.type in ('error_leaf', 'error_node') or
_is_flow_node(new_nodes[-1])):
# Error leafs/nodes don't have a defined start/end. Error
# nodes might not end with a newline (e.g. if there's an
# open `(`). Therefore ignore all of them unless they are
# succeeded with valid parser state.
# If we copy flows at the end, they might be continued
# after the copy limit (in the new parser).
# In this while loop we try to remove until we find a newline.
new_nodes.pop()
while new_nodes:
last_node = new_nodes[-1]
if last_node.get_last_leaf().type == 'newline':
break
new_nodes.pop()
assert new_nodes
tos.add_child_node(suite_tos)
working_stack = new_working_stack
had_valid_suite_last = True
if new_nodes:
try:
last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
except IndexError:
line_offset = 0
# In this case we don't have to calculate an offset, because
# there's no children to be managed.
last_line_offset_leaf = None
tos.add(new_nodes, line_offset, last_line_offset_leaf)
return new_nodes, new_tos
last_node = new_nodes[-1]
if (last_node.type in ('error_leaf', 'error_node') or
_is_flow_node(new_nodes[-1])):
# Error leafs/nodes don't have a defined start/end. Error
# nodes might not end with a newline (e.g. if there's an
# open `(`). Therefore ignore all of them unless they are
# succeeded with valid parser state.
# If we copy flows at the end, they might be continued
# after the copy limit (in the new parser).
# In this while loop we try to remove until we find a newline.
new_prefix = ''
new_nodes.pop()
while new_nodes:
last_node = new_nodes[-1]
if last_node.get_last_leaf().type == 'newline':
break
new_nodes.pop()
def _update_tos(self, tree_node):
if tree_node.type in ('suite', 'file_input'):
self._tos = _NodesStackNode(tree_node, self._tos)
self._tos.add(list(tree_node.children))
self._update_tos(tree_node.children[-1])
elif _func_or_class_has_suite(tree_node):
self._update_tos(tree_node.children[-1])
if new_nodes:
if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
p = new_nodes[-1].get_next_leaf().prefix
# We are not allowed to remove the newline at the end of the
# line, otherwise it's going to be missing. This happens e.g.
# if a bracket is around before that moves newlines to
# prefixes.
new_prefix = split_lines(p, keepends=True)[0]
if had_valid_suite_last:
last = new_nodes[-1]
if last.type == 'decorated':
last = last.children[-1]
if last.type in ('async_funcdef', 'async_stmt'):
last = last.children[-1]
last_line_offset_leaf = last.children[-2].get_last_leaf()
assert last_line_offset_leaf == ':'
else:
last_line_offset_leaf = new_nodes[-1].get_last_leaf()
tos.add_tree_nodes(prefix, new_nodes, line_offset, last_line_offset_leaf)
prefix = new_prefix
self._prefix_remainder = ''
return new_nodes, working_stack, prefix
def close(self):
while self._tos is not None:
self._close_tos()
self._base_node.finish()
# Add an endmarker.
try:
last_leaf = self._module.get_last_leaf()
end_pos = list(last_leaf.end_pos)
except IndexError:
end_pos = [1, 0]
else:
last_leaf = _skip_dedent_error_leaves(last_leaf)
end_pos = list(last_leaf.end_pos)
lines = split_lines(self.prefix)
assert len(lines) > 0
if len(lines) == 1:
@@ -601,6 +713,6 @@ class _NodesStack(object):
end_pos[0] += len(lines) - 1
end_pos[1] = len(lines[-1])
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
endmarker.parent = self._module
self._module.children.append(endmarker)

View File

@@ -570,11 +570,14 @@ class _BytesAndStringMix(SyntaxRule):
message = "cannot mix bytes and nonbytes literals"
def _is_bytes_literal(self, string):
if string.type == 'fstring':
return False
return 'b' in string.string_prefix.lower()
def is_issue(self, node):
first = node.children[0]
if first.type == 'string' and self._normalizer.version >= (3, 0):
# In Python 2 it's allowed to mix bytes and unicode.
if self._normalizer.version >= (3, 0):
first_is_bytes = self._is_bytes_literal(first)
for string in node.children[1:]:
if first_is_bytes != self._is_bytes_literal(string):

View File

@@ -15,8 +15,6 @@ decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
# skipping python3.5+ compatibility, in favour of 3.7 solution
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite

157
parso/python/grammar38.txt Normal file
View File

@@ -0,0 +1,157 @@
# Grammar for Python
# NOTE WELL: You should also follow all the steps listed at
# https://devguide.python.org/grammar/
# Start symbols for the grammar:
# single_input is a single interactive statement;
# file_input is a module or sequence of commands read from an input file;
# eval_input is the input for the eval() functions.
# NB: compound_stmt in single_input is followed by extra NEWLINE!
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
file_input: (NEWLINE | stmt)* ENDMARKER
eval_input: testlist NEWLINE* ENDMARKER
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
decorators: decorator+
decorated: decorators (classdef | funcdef | async_funcdef)
async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')'
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [','])
tfpdef: NAME [':' test]
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']
)
vfpdef: NAME
stmt: simple_stmt | compound_stmt
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
('=' (yield_expr|testlist_star_expr))*)
annassign: ':' test ['=' test]
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
'<<=' | '>>=' | '**=' | '//=')
# For normal and annotated assignments, additional restrictions enforced by the interpreter
del_stmt: 'del' exprlist
pass_stmt: 'pass'
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
break_stmt: 'break'
continue_stmt: 'continue'
return_stmt: 'return' [testlist_star_expr]
yield_stmt: yield_expr
raise_stmt: 'raise' [test ['from' test]]
import_stmt: import_name | import_from
import_name: 'import' dotted_as_names
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
'import' ('*' | '(' import_as_names ')' | import_as_names))
import_as_name: NAME ['as' NAME]
dotted_as_name: dotted_name ['as' NAME]
import_as_names: import_as_name (',' import_as_name)* [',']
dotted_as_names: dotted_as_name (',' dotted_as_name)*
dotted_name: NAME ('.' NAME)*
global_stmt: 'global' NAME (',' NAME)*
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite
((except_clause ':' suite)+
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test ['as' NAME]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
test: or_test ['if' or_test 'else' test] | lambdef
test_nocond: or_test | lambdef_nocond
lambdef: 'lambda' [varargslist] ':' test
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
not_test: 'not' not_test | comparison
comparison: expr (comp_op expr)*
# <> isn't actually a valid comparison operator in Python. It's here for the
# sake of a __future__ import described in PEP 401 (which really works :-)
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
star_expr: '*' expr
expr: xor_expr ('|' xor_expr)*
xor_expr: and_expr ('^' and_expr)*
and_expr: shift_expr ('&' shift_expr)*
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
arith_expr: term (('+'|'-') term)*
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
factor: ('+'|'-'|'~') factor | power
power: atom_expr ['**' factor]
atom_expr: ['await'] atom trailer*
atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
arglist: argument (',' argument)* [',']
# The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME.
# "test '=' test" is really "keyword '=' test", but we have no such token.
# These need to be in a single rule to avoid grammar that is ambiguous
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
# we explicitly match '*' here, too, to give it proper precedence.
# Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] |
test '=' test |
'**' test |
'*' test )
comp_iter: comp_for | comp_if
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler
encoding_decl: NAME
yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr
strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content*

View File

@@ -90,7 +90,7 @@ class Parser(BaseParser):
strictly bottom-up.
"""
try:
return self.node_map[nonterminal](children)
node = self.node_map[nonterminal](children)
except KeyError:
if nonterminal == 'suite':
# We don't want the INDENT/DEDENT in our parser tree. Those
@@ -104,7 +104,10 @@ class Parser(BaseParser):
elif nonterminal == 'listmaker':
# Same as list_if above.
nonterminal = 'testlist_comp'
return self.default_node(nonterminal, children)
node = self.default_node(nonterminal, children)
for c in children:
c.parent = node
return node
def convert_leaf(self, type, value, prefix, start_pos):
# print('leaf', repr(value), token.tok_name[type])
@@ -124,8 +127,9 @@ class Parser(BaseParser):
last_leaf = None
if self._start_nonterminal == 'file_input' and \
(token.type == PythonTokenTypes.ENDMARKER or
token.type == DEDENT and '\n' not in last_leaf.value):
(token.type == PythonTokenTypes.ENDMARKER
or token.type == DEDENT and '\n' not in last_leaf.value
and '\r' not in last_leaf.value):
# In Python statements need to end with a newline. But since it's
# possible (and valid in Python ) that there's no newline at the
# end of a file, we have to recover even if the user doesn't want
@@ -189,7 +193,10 @@ class Parser(BaseParser):
all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
if all_nodes:
self.stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes))
node = tree.PythonErrorNode(all_nodes)
for n in all_nodes:
n.parent = node
self.stack[start_index - 1].nodes.append(node)
self.stack[start_index:] = []
return bool(all_nodes)
@@ -197,7 +204,6 @@ class Parser(BaseParser):
def _recovery_tokenize(self, tokens):
for token in tokens:
typ = token[0]
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
if typ == DEDENT:
# We need to count indents, because if we just omit any DEDENT,
# we might omit them in the wrong place.

View File

@@ -391,11 +391,11 @@ class PEP8Normalizer(ErrorFinder):
if value.lstrip('#'):
self.add_issue(part, 266, "Too many leading '#' for block comment.")
elif self._on_newline:
if not re.match('#:? ', value) and not value == '#' \
if not re.match(r'#:? ', value) and not value == '#' \
and not (value.startswith('#!') and part.start_pos == (1, 0)):
self.add_issue(part, 265, "Block comment should start with '# '")
else:
if not re.match('#:? [^ ]', value):
if not re.match(r'#:? [^ ]', value):
self.add_issue(part, 262, "Inline comment should start with '# '")
self._reset_newlines(spacing, leaf, is_comment=True)
@@ -677,7 +677,7 @@ class PEP8Normalizer(ErrorFinder):
elif typ == 'string':
# Checking multiline strings
for i, line in enumerate(leaf.value.splitlines()[1:]):
indentation = re.match('[ \t]*', line).group(0)
indentation = re.match(r'[ \t]*', line).group(0)
start_pos = leaf.line + i, len(indentation)
# TODO check multiline indentation.
elif typ == 'endmarker':

View File

@@ -167,13 +167,13 @@ def _create_token_collection(version_info):
FStringStart = group(*fstring_prefixes)
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
Single = r"(?:\\.|[^'\\])*'"
# Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
Double = r'(?:\\.|[^"\\])*"'
# Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
# Because of leftmost-then-longest match semantics, be sure to put the
@@ -186,7 +186,7 @@ def _create_token_collection(version_info):
Bracket = '[][(){}]'
special_args = [r'\r?\n', r'[:;.,@]']
special_args = [r'\r\n?', r'\n', r'[:;.,@]']
if version_info >= (3, 0):
special_args.insert(0, r'\.\.\.')
Special = group(*special_args)
@@ -194,16 +194,16 @@ def _create_token_collection(version_info):
Funny = group(Operator, Bracket, Special)
# First (or only) line of ' or " string.
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
group("'", r'\\\r?\n'),
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
group('"', r'\\\r?\n'))
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
group("'", r'\\(?:\r\n?|\n)'),
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
group('"', r'\\(?:\r\n?|\n)'))
pseudo_extra_pool = [Comment, Triple]
all_quotes = '"', "'", '"""', "'''"
if fstring_prefixes:
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
PseudoToken = group(Whitespace, capture=True) + \
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
@@ -273,6 +273,9 @@ class FStringNode(object):
def close_parentheses(self, character):
self.parentheses_count -= 1
if self.parentheses_count == 0:
# No parentheses means that the format spec is also finished.
self.format_spec_count = 0
def allow_multiline(self):
return len(self.quote) == 3
@@ -281,60 +284,50 @@ class FStringNode(object):
return (self.parentheses_count - self.format_spec_count) > 0
def _check_fstring_ending(fstring_stack, token, from_start=False):
fstring_end = float('inf')
fstring_index = None
for i, node in enumerate(fstring_stack):
if from_start:
if token.startswith(node.quote):
fstring_index = i
fstring_end = len(node.quote)
else:
continue
else:
try:
end = token.index(node.quote)
except ValueError:
pass
else:
if fstring_index is None or end < fstring_end:
fstring_index = i
fstring_end = end
return fstring_index, fstring_end
def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix):
for fstring_stack_index, node in enumerate(fstring_stack):
if string.startswith(node.quote):
token = PythonToken(
FSTRING_END,
node.quote,
start_pos,
prefix=additional_prefix,
)
additional_prefix = ''
assert not node.previous_lines
del fstring_stack[fstring_stack_index:]
return token, '', len(node.quote)
return None, additional_prefix, 0
def _find_fstring_string(fstring_stack, line, lnum, pos):
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
tos = fstring_stack[-1]
if tos.is_in_expr():
return '', pos
allow_multiline = tos.allow_multiline()
if allow_multiline:
match = fstring_string_multi_line.match(line, pos)
else:
new_pos = pos
allow_multiline = tos.allow_multiline()
if allow_multiline:
match = fstring_string_multi_line.match(line, pos)
else:
match = fstring_string_single_line.match(line, pos)
if match is None:
string = tos.previous_lines
else:
if not tos.previous_lines:
tos.last_string_start_pos = (lnum, pos)
match = fstring_string_single_line.match(line, pos)
if match is None:
return tos.previous_lines, pos
string = match.group(0)
for fstring_stack_node in fstring_stack:
try:
string = string[:string.index(fstring_stack_node.quote)]
except ValueError:
pass # The string was not found.
if not tos.previous_lines:
tos.last_string_start_pos = (lnum, pos)
new_pos += len(string)
if allow_multiline and string.endswith('\n'):
tos.previous_lines += string
string = ''
else:
string = tos.previous_lines + string
string = match.group(0)
for fstring_stack_node in fstring_stack:
end_match = endpats[fstring_stack_node.quote].match(string)
if end_match is not None:
string = end_match.group(0)[:-len(fstring_stack_node.quote)]
return string, new_pos
new_pos = pos
new_pos += len(string)
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
tos.previous_lines += string
string = ''
else:
string = tos.previous_lines + string
return string, new_pos
def tokenize(code, version_info, start_pos=(1, 0)):
@@ -349,7 +342,6 @@ def _print_tokens(func):
"""
def wrapper(*args, **kwargs):
for token in func(*args, **kwargs):
print(token)
yield token
return wrapper
@@ -364,6 +356,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token. This idea comes from lib2to3. The prefix contains all information
that is irrelevant for the parser like newlines in parentheses or comments.
"""
def dedent_if_necessary(start):
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
fstring_pattern_map, always_break_tokens, = \
_get_token_collection(version_info)
@@ -416,40 +416,42 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
while pos < max:
if fstring_stack:
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
if string:
yield PythonToken(
FSTRING_STRING, string,
fstring_stack[-1].last_string_start_pos,
# Never has a prefix because it can start anywhere and
# include whitespace.
prefix=''
)
fstring_stack[-1].previous_lines = ''
continue
if pos == max:
break
tos = fstring_stack[-1]
if not tos.is_in_expr():
string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
if string:
yield PythonToken(
FSTRING_STRING, string,
tos.last_string_start_pos,
# Never has a prefix because it can start anywhere and
# include whitespace.
prefix=''
)
tos.previous_lines = ''
continue
if pos == max:
break
rest = line[pos:]
fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True)
if fstring_index is not None:
yield PythonToken(
FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, pos),
prefix=additional_prefix,
)
additional_prefix = ''
del fstring_stack[fstring_index:]
pos += end
fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
fstring_stack,
rest,
(lnum, pos),
additional_prefix,
)
pos += quote_length
if fstring_end_token is not None:
yield fstring_end_token
continue
pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens
match = whitespace.match(line, pos)
if pos == 0:
for t in dedent_if_necessary(match.end()):
yield t
pos = match.end()
new_line = False
yield PythonToken(
ERRORTOKEN, line[pos], (lnum, pos),
additional_prefix + match.group(0)
@@ -471,40 +473,20 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
break
initial = token[0]
if new_line and initial not in '\r\n#':
if new_line and initial not in '\r\n\\#':
new_line = False
if paren_level == 0 and not fstring_stack:
i = 0
indent_start = start
while line[i] == '\f':
i += 1
# TODO don't we need to change spos as well?
start -= 1
if start > indents[-1]:
indent_start -= 1
if indent_start > indents[-1]:
yield PythonToken(INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
if start > indents[-2]:
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
break
yield PythonToken(DEDENT, '', spos, '')
indents.pop()
if fstring_stack:
fstring_index, end = _check_fstring_ending(fstring_stack, token)
if fstring_index is not None:
if end != 0:
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
prefix = ''
yield PythonToken(
FSTRING_END,
fstring_stack[fstring_index].quote,
(lnum, spos[1] + 1),
prefix=prefix
)
del fstring_stack[fstring_index:]
pos -= len(token) - end
continue
indents.append(indent_start)
for t in dedent_if_necessary(indent_start):
yield t
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
@@ -535,10 +517,23 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
contstr = line[start:]
contline = line
break
# Check up to the first 3 chars of the token to see if
# they're in the single_quoted set. If so, they start
# a string.
# We're using the first 3, because we're looking for
# "rb'" (for example) at the start of the token. If
# we switch to longer prefixes, this needs to be
# adjusted.
# Note that initial == token[:1].
# Also note that single quote checking must come after
# triple quote checking (above).
elif initial in single_quoted or \
token[:2] in single_quoted or \
token[:3] in single_quoted:
if token[-1] == '\n': # continued string
if token[-1] in '\r\n': # continued string
# This means that a single quoted string ends with a
# backslash and is continued.
contstr_start = lnum, start
endprog = (endpats.get(initial) or endpats.get(token[1])
or endpats.get(token[2]))
@@ -554,15 +549,17 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if token in always_break_tokens:
fstring_stack[:] = []
paren_level = 0
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
# We only want to dedent if the token is on a new line.
if re.match(r'[ \f\t]*$', line[:start]):
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
additional_prefix += prefix + line[start:]
break
else:
@@ -575,7 +572,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if fstring_stack:
fstring_stack[-1].close_parentheses(token)
else:
paren_level -= 1
if paren_level:
paren_level -= 1
elif token == ':' and fstring_stack \
and fstring_stack[-1].parentheses_count == 1:
fstring_stack[-1].format_spec_count += 1
@@ -584,7 +582,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if contstr:
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
if contstr.endswith('\n'):
if contstr.endswith('\n') or contstr.endswith('\r'):
new_line = True
end_pos = lnum, max

View File

@@ -48,6 +48,7 @@ from parso._compatibility import utf8_repr, unicode
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
search_ancestor
from parso.python.prefix import split_prefix
from parso.utils import split_lines
_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
'with_stmt', 'async_stmt', 'suite'])
@@ -124,11 +125,13 @@ class PythonLeaf(PythonMixin, Leaf):
# indent error leafs somehow? No idea how, though.
previous_leaf = self.get_previous_leaf()
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'):
previous_leaf = previous_leaf.get_previous_leaf()
if previous_leaf is None:
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
if previous_leaf is None: # It's the first leaf.
lines = split_lines(self.prefix)
# + 1 is needed because split_lines always returns at least [''].
return self.line - len(lines) + 1, 0 # It's the first leaf.
return previous_leaf.end_pos
@@ -166,7 +169,9 @@ class EndMarker(_LeafWithoutNewlines):
@utf8_repr
def __repr__(self):
return "<%s: prefix=%s>" % (type(self).__name__, repr(self.prefix))
return "<%s: prefix=%s end_pos=%s>" % (
type(self).__name__, repr(self.prefix), self.end_pos
)
class Newline(PythonLeaf):
@@ -252,7 +257,7 @@ class String(Literal):
@property
def string_prefix(self):
return re.match('\w*(?=[\'"])', self.value).group(0)
return re.match(r'\w*(?=[\'"])', self.value).group(0)
def _get_payload(self):
match = re.search(
@@ -263,7 +268,7 @@ class String(Literal):
return match.group(2)[:-len(match.group(1))]
class FStringString(Leaf):
class FStringString(PythonLeaf):
"""
f-strings contain f-string expressions and normal python strings. These are
the string parts of f-strings.
@@ -272,7 +277,7 @@ class FStringString(Leaf):
__slots__ = ()
class FStringStart(Leaf):
class FStringStart(PythonLeaf):
"""
f-strings contain f-string expressions and normal python strings. These are
the string parts of f-strings.
@@ -281,7 +286,7 @@ class FStringStart(Leaf):
__slots__ = ()
class FStringEnd(Leaf):
class FStringEnd(PythonLeaf):
"""
f-strings contain f-string expressions and normal python strings. These are
the string parts of f-strings.
@@ -964,7 +969,7 @@ class ImportName(Import):
class KeywordStatement(PythonBaseNode):
"""
For the following statements: `assert`, `del`, `global`, `nonlocal`,
`raise`, `return`, `yield`, `return`, `yield`.
`raise`, `return`, `yield`.
`pass`, `continue` and `break` are not in there, because they are just
simple keywords and the parser reduces it to a keyword.

View File

@@ -1,5 +1,7 @@
from abc import abstractmethod, abstractproperty
from parso._compatibility import utf8_repr, encoding, py_version
from parso.utils import split_lines
def search_ancestor(node, *node_types):
@@ -193,7 +195,9 @@ class Leaf(NodeOrLeaf):
def get_start_pos_of_prefix(self):
previous_leaf = self.get_previous_leaf()
if previous_leaf is None:
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
lines = split_lines(self.prefix)
# + 1 is needed because split_lines always returns at least [''].
return self.line - len(lines) + 1, 0 # It's the first leaf.
return previous_leaf.end_pos
def get_first_leaf(self):
@@ -210,7 +214,7 @@ class Leaf(NodeOrLeaf):
@property
def end_pos(self):
lines = self.value.split('\n')
lines = split_lines(self.value)
end_pos_line = self.line + len(lines) - 1
# Check for multiline token
if self.line == end_pos_line:
@@ -244,8 +248,6 @@ class BaseNode(NodeOrLeaf):
type = None
def __init__(self, children):
for c in children:
c.parent = self
self.children = children
"""
A list of :class:`NodeOrLeaf` child nodes.
@@ -318,7 +320,7 @@ class BaseNode(NodeOrLeaf):
@utf8_repr
def __repr__(self):
code = self.get_code().replace('\n', ' ').strip()
code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
if not py_version >= 30:
code = code.encode(encoding, 'replace')
return "<%s: %s@%s,%s>" % \

View File

@@ -5,6 +5,20 @@ from ast import literal_eval
from parso._compatibility import unicode, total_ordering
# The following is a list in Python that are line breaks in str.splitlines, but
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
# 0xA) are allowed to split lines.
_NON_LINE_BREAKS = (
u'\v', # Vertical Tabulation 0xB
u'\f', # Form Feed 0xC
u'\x1C', # File Separator
u'\x1D', # Group Separator
u'\x1E', # Record Separator
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
# Used to mark end-of-line on some IBM mainframes.)
u'\u2028', # Line Separator
u'\u2029', # Paragraph Separator
)
Version = namedtuple('Version', 'major, minor, micro')
@@ -26,8 +40,13 @@ def split_lines(string, keepends=False):
# We have to merge lines that were broken by form feed characters.
merge = []
for i, line in enumerate(lst):
if line.endswith('\f'):
merge.append(i)
try:
last_chr = line[-1]
except IndexError:
pass
else:
if last_chr in _NON_LINE_BREAKS:
merge.append(i)
for index in reversed(merge):
try:
@@ -41,11 +60,11 @@ def split_lines(string, keepends=False):
# The stdlib's implementation of the end is inconsistent when calling
# it with/without keepends. One time there's an empty string in the
# end, one time there's none.
if string.endswith('\n') or string == '':
if string.endswith('\n') or string.endswith('\r') or string == '':
lst.append('')
return lst
else:
return re.split('\n|\r\n', string)
return re.split(r'\n|\r\n|\r', string)
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):

View File

@@ -40,8 +40,16 @@ setup(name='parso',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Text Editors :: Integrated Development Environments (IDE)',
'Topic :: Utilities',
],
extras_require={
'testing': [
'pytest>=3.0.7',
'docopt',
],
},
)

View File

@@ -285,6 +285,14 @@ if sys.version_info >= (3,):
'b"ä"',
# combining strings and unicode is allowed in Python 2.
'"s" b""',
'"s" b"" ""',
'b"" "" b"" ""',
]
if sys.version_info >= (3, 6):
FAILING_EXAMPLES += [
# Same as above, but for f-strings.
'f"s" b""',
'b"s" f""',
]
if sys.version_info >= (2, 7):
# This is something that raises a different error in 2.6 than in the other

290
test/fuzz_diff_parser.py Normal file
View File

@@ -0,0 +1,290 @@
"""
A script to find bugs in the diff parser.
This script is extremely useful if changes are made to the diff parser. By
running a few thousand iterations, we can assure that the diff parser is in
good shape.
Usage:
fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>]
fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p]
fuzz_diff_parser.py -h | --help
Options:
-h --help Show this screen
-n, --maxtries=<nr> Maximum of random tries [default: 1000]
-x, --changes=<nr> Amount of changes to be done to a file per try [default: 5]
-l, --logging Prints all the logs
-o, --only-last=<nr> Only runs the last n iterations; Defaults to running all
-p, --print-code Print all test diffs
--pdb Launch pdb when error is raised
--ipdb Launch ipdb when error is raised
"""
from __future__ import print_function
import logging
import sys
import os
import random
import pickle
import parso
from parso.utils import split_lines
from test.test_diff_parser import _check_error_leaves_nodes
_latest_grammar = parso.load_grammar(version='3.8')
_python_reserved_strings = tuple(
# Keywords are ususally only interesting in combination with spaces after
# them. We don't put a space before keywords, to avoid indentation errors.
s + (' ' if s.isalpha() else '')
for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
)
_random_python_fragments = _python_reserved_strings + (
' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
"'''", ';', ' some_random_word ', '\\', '#',
)
def find_python_files_in_tree(file_path):
if not os.path.isdir(file_path):
yield file_path
return
for root, dirnames, filenames in os.walk(file_path):
for name in filenames:
if name.endswith('.py'):
yield os.path.join(root, name)
def _print_copyable_lines(lines):
for line in lines:
line = repr(line)[1:-1]
if line.endswith(r'\n'):
line = line[:-2] + '\n'
print(line, end='')
def _get_first_error_start_pos_or_none(module):
error_leaf = _check_error_leaves_nodes(module)
return None if error_leaf is None else error_leaf.start_pos
class LineReplacement:
def __init__(self, line_nr, new_line):
self._line_nr = line_nr
self._new_line = new_line
def apply(self, code_lines):
# print(repr(self._new_line))
code_lines[self._line_nr] = self._new_line
class LineDeletion:
def __init__(self, line_nr):
self.line_nr = line_nr
def apply(self, code_lines):
del code_lines[self.line_nr]
class LineCopy:
def __init__(self, copy_line, insertion_line):
self._copy_line = copy_line
self._insertion_line = insertion_line
def apply(self, code_lines):
code_lines.insert(
self._insertion_line,
# Use some line from the file. This doesn't feel totally
# random, but for the diff parser it will feel like it.
code_lines[self._copy_line]
)
class FileModification:
@classmethod
def generate(cls, code_lines, change_count):
return cls(
list(cls._generate_line_modifications(code_lines, change_count)),
# work with changed trees more than with normal ones.
check_original=random.random() > 0.8,
)
@staticmethod
def _generate_line_modifications(lines, change_count):
def random_line(include_end=False):
return random.randint(0, len(lines) - (not include_end))
lines = list(lines)
for _ in range(change_count):
rand = random.randint(1, 4)
if rand == 1:
if len(lines) == 1:
# We cannot delete every line, that doesn't make sense to
# fuzz and it would be annoying to rewrite everything here.
continue
l = LineDeletion(random_line())
elif rand == 2:
# Copy / Insertion
# Make it possible to insert into the first and the last line
l = LineCopy(random_line(), random_line(include_end=True))
elif rand in (3, 4):
# Modify a line in some weird random ways.
line_nr = random_line()
line = lines[line_nr]
column = random.randint(0, len(line))
random_string = ''
for _ in range(random.randint(1, 3)):
if random.random() > 0.8:
# The lower characters cause way more issues.
unicode_range = 0x1f if random.randint(0, 1) else 0x3000
random_string += chr(random.randint(0, unicode_range))
else:
# These insertions let us understand how random
# keyword/operator insertions work. Theoretically this
# could also be done with unicode insertions, but the
# fuzzer is just way more effective here.
random_string += random.choice(_random_python_fragments)
if random.random() > 0.5:
# In this case we insert at a very random place that
# probably breaks syntax.
line = line[:column] + random_string + line[column:]
else:
# Here we have better chances to not break syntax, because
# we really replace the line with something that has
# indentation.
line = ' ' * random.randint(0, 12) + random_string + '\n'
l = LineReplacement(line_nr, line)
l.apply(lines)
yield l
def __init__(self, modification_list, check_original):
self._modification_list = modification_list
self._check_original = check_original
def _apply(self, code_lines):
changed_lines = list(code_lines)
for modification in self._modification_list:
modification.apply(changed_lines)
return changed_lines
def run(self, grammar, code_lines, print_code):
code = ''.join(code_lines)
modified_lines = self._apply(code_lines)
modified_code = ''.join(modified_lines)
if print_code:
if self._check_original:
print('Original:')
_print_copyable_lines(code_lines)
print('\nModified:')
_print_copyable_lines(modified_lines)
print()
if self._check_original:
m = grammar.parse(code, diff_cache=True)
start1 = _get_first_error_start_pos_or_none(m)
grammar.parse(modified_code, diff_cache=True)
if self._check_original:
# Also check if it's possible to "revert" the changes.
m = grammar.parse(code, diff_cache=True)
start2 = _get_first_error_start_pos_or_none(m)
assert start1 == start2, (start1, start2)
class FileTests:
def __init__(self, file_path, test_count, change_count):
self._path = file_path
with open(file_path) as f:
code = f.read()
self._code_lines = split_lines(code, keepends=True)
self._test_count = test_count
self._code_lines = self._code_lines
self._change_count = change_count
self._file_modifications = []
def _run(self, grammar, file_modifications, debugger, print_code=False):
try:
for i, fm in enumerate(file_modifications, 1):
fm.run(grammar, self._code_lines, print_code=print_code)
print('.', end='')
sys.stdout.flush()
print()
except Exception:
print("Issue in file: %s" % self._path)
if debugger:
einfo = sys.exc_info()
pdb = __import__(debugger)
pdb.post_mortem(einfo[2])
raise
def redo(self, grammar, debugger, only_last, print_code):
mods = self._file_modifications
if only_last is not None:
mods = mods[-only_last:]
self._run(grammar, mods, debugger, print_code=print_code)
def run(self, grammar, debugger):
def iterate():
for _ in range(self._test_count):
fm = FileModification.generate(self._code_lines, self._change_count)
self._file_modifications.append(fm)
yield fm
self._run(grammar, iterate(), debugger)
def main(arguments):
debugger = 'pdb' if arguments['--pdb'] else \
'ipdb' if arguments['--ipdb'] else None
redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle')
if arguments['--logging']:
root = logging.getLogger()
root.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
root.addHandler(ch)
grammar = parso.load_grammar()
parso.python.diff.DEBUG_DIFF_PARSER = True
if arguments['redo']:
with open(redo_file, 'rb') as f:
file_tests_obj = pickle.load(f)
only_last = arguments['--only-last'] and int(arguments['--only-last'])
file_tests_obj.redo(
grammar,
debugger,
only_last=only_last,
print_code=arguments['--print-code']
)
elif arguments['random']:
# A random file is used to do diff parser checks if no file is given.
# This helps us to find errors in a lot of different files.
file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.'))
max_tries = int(arguments['--maxtries'])
tries = 0
try:
while tries < max_tries:
path = random.choice(file_paths)
print("Checking %s: %s tries" % (path, tries))
now_tries = min(1000, max_tries - tries)
file_tests_obj = FileTests(path, now_tries, int(arguments['--changes']))
file_tests_obj.run(grammar, debugger)
tries += now_tries
except Exception:
with open(redo_file, 'wb') as f:
pickle.dump(file_tests_obj, f)
raise
else:
raise NotImplementedError('Command is not implemented')
if __name__ == '__main__':
from docopt import docopt
arguments = docopt(__doc__)
main(arguments)

View File

@@ -1,14 +1,18 @@
# -*- coding: utf-8 -*-
from textwrap import dedent
import logging
import sys
import pytest
from parso.utils import split_lines
from parso import cache
from parso import load_grammar
from parso.python.diff import DiffParser
from parso.python.diff import DiffParser, _assert_valid_graph
from parso import parse
ANY = object()
def test_simple():
"""
@@ -21,7 +25,7 @@ def test_simple():
def _check_error_leaves_nodes(node):
if node.type in ('error_leaf', 'error_node'):
return True
return node
try:
children = node.children
@@ -29,23 +33,10 @@ def _check_error_leaves_nodes(node):
pass
else:
for child in children:
if _check_error_leaves_nodes(child):
return True
return False
def _assert_valid_graph(node):
"""
Checks if the parent/children relationship is correct.
"""
try:
children = node.children
except AttributeError:
return
for child in children:
assert child.parent == node
_assert_valid_graph(child)
x_node = _check_error_leaves_nodes(child)
if x_node is not None:
return x_node
return None
class Differ(object):
@@ -60,6 +51,8 @@ class Differ(object):
self.lines = split_lines(code, keepends=True)
self.module = parse(code, diff_cache=True, cache=True)
assert code == self.module.get_code()
_assert_valid_graph(self.module)
return self.module
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
@@ -73,11 +66,15 @@ class Differ(object):
new_module = diff_parser.update(self.lines, lines)
self.lines = lines
assert code == new_module.get_code()
assert diff_parser._copy_count == copies
#assert diff_parser._parser_count == parsers
assert expect_error_leaves == _check_error_leaves_nodes(new_module)
_assert_valid_graph(new_module)
error_node = _check_error_leaves_nodes(new_module)
assert expect_error_leaves == (error_node is not None), error_node
if parsers is not ANY:
assert diff_parser._parser_count == parsers
if copies is not ANY:
assert diff_parser._copy_count == copies
return new_module
@@ -122,7 +119,7 @@ def test_positions(differ):
m = differ.parse('a\n\n', parsers=1)
assert m.end_pos == (3, 0)
m = differ.parse('a\n\n ', copies=1, parsers=1)
m = differ.parse('a\n\n ', copies=1, parsers=2)
assert m.end_pos == (3, 1)
m = differ.parse('a ', parsers=1)
assert m.end_pos == (1, 2)
@@ -138,7 +135,7 @@ def test_if_simple(differ):
differ.initialize(src + 'a')
differ.parse(src + else_ + "a", copies=0, parsers=1)
differ.parse(else_, parsers=1, expect_error_leaves=True)
differ.parse(else_, parsers=1, copies=1, expect_error_leaves=True)
differ.parse(src + else_, parsers=1)
@@ -208,7 +205,7 @@ def test_open_parentheses(differ):
differ.parse(new_code, parsers=1, expect_error_leaves=True)
new_code = 'a = 1\n' + new_code
differ.parse(new_code, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(new_code, parsers=2, expect_error_leaves=True)
func += 'def other_func():\n pass\n'
differ.initialize('isinstance(\n' + func)
@@ -222,6 +219,7 @@ def test_open_parentheses_at_end(differ):
differ.initialize(code)
differ.parse(code, parsers=1, expect_error_leaves=True)
def test_backslash(differ):
src = dedent(r"""
a = 1\
@@ -255,7 +253,7 @@ def test_backslash(differ):
def test_full_copy(differ):
code = 'def foo(bar, baz):\n pass\n bar'
differ.initialize(code)
differ.parse(code, copies=1, parsers=1)
differ.parse(code, copies=1)
def test_wrong_whitespace(differ):
@@ -263,10 +261,10 @@ def test_wrong_whitespace(differ):
hello
'''
differ.initialize(code)
differ.parse(code + 'bar\n ', parsers=1)
differ.parse(code + 'bar\n ', parsers=3)
code += """abc(\npass\n """
differ.parse(code, parsers=1, copies=1, expect_error_leaves=True)
differ.parse(code, parsers=2, copies=1, expect_error_leaves=True)
def test_issues_with_error_leaves(differ):
@@ -367,7 +365,7 @@ def test_totally_wrong_whitespace(differ):
'''
differ.initialize(code1)
differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)
differ.parse(code2, parsers=4, copies=0, expect_error_leaves=True)
def test_node_insertion(differ):
@@ -466,6 +464,9 @@ def test_in_parentheses_newlines(differ):
b = 2""")
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=1)
def test_indentation_issue(differ):
code1 = dedent("""
@@ -483,7 +484,7 @@ def test_indentation_issue(differ):
""")
differ.initialize(code1)
differ.parse(code2, parsers=2)
differ.parse(code2, parsers=1)
def test_endmarker_newline(differ):
@@ -501,7 +502,7 @@ def test_endmarker_newline(differ):
code2 = code1.replace('codet', 'coded')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
def test_newlines_at_end(differ):
@@ -517,7 +518,7 @@ def test_end_newline_with_decorator(differ):
json.l''')
differ.initialize(code)
module = differ.parse(code + '\n', copies=1)
module = differ.parse(code + '\n', copies=1, parsers=1)
decorated, endmarker = module.children
assert decorated.type == 'decorated'
decorator, func = decorated.children
@@ -526,3 +527,760 @@ def test_end_newline_with_decorator(differ):
newline, first_stmt, second_stmt = suite.children
assert first_stmt.get_code() == ' import json\n'
assert second_stmt.get_code() == ' json.l\n'
def test_invalid_to_valid_nodes(differ):
code1 = dedent('''\
def a():
foo = 3
def b():
la = 3
else:
la
return
foo
base
''')
code2 = dedent('''\
def a():
foo = 3
def b():
la = 3
if foo:
latte = 3
else:
la
return
foo
base
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=3)
def test_if_removal_and_reappearence(differ):
code1 = dedent('''\
la = 3
if foo:
latte = 3
else:
la
pass
''')
code2 = dedent('''\
la = 3
latte = 3
else:
la
pass
''')
code3 = dedent('''\
la = 3
if foo:
latte = 3
else:
la
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=4, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)
differ.parse(code3, parsers=1, copies=1)
def test_add_error_indentation(differ):
code = 'if x:\n 1\n'
differ.initialize(code)
differ.parse(code + ' 2\n', parsers=1, copies=0, expect_error_leaves=True)
def test_differing_docstrings(differ):
code1 = dedent('''\
def foobar(x, y):
1
return x
def bazbiz():
foobar()
lala
''')
code2 = dedent('''\
def foobar(x, y):
2
return x + y
def bazbiz():
z = foobar()
lala
''')
differ.initialize(code1)
differ.parse(code2, parsers=3, copies=1)
differ.parse(code1, parsers=3, copies=1)
def test_one_call_in_function_change(differ):
code1 = dedent('''\
def f(self):
mro = [self]
for a in something:
yield a
def g(self):
return C(
a=str,
b=self,
)
''')
code2 = dedent('''\
def f(self):
mro = [self]
def g(self):
return C(
a=str,
t
b=self,
)
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
differ.parse(code1, parsers=2, copies=1)
def test_function_deletion(differ):
code1 = dedent('''\
class C(list):
def f(self):
def iterate():
for x in b:
break
return list(iterate())
''')
code2 = dedent('''\
class C():
def f(self):
for x in b:
break
return list(iterate())
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=0)
def test_docstring_removal(differ):
code1 = dedent('''\
class E(Exception):
"""
1
2
3
"""
class S(object):
@property
def f(self):
return cmd
def __repr__(self):
return cmd2
''')
code2 = dedent('''\
class E(Exception):
"""
1
3
"""
class S(object):
@property
def f(self):
return cmd
return cmd2
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=2)
differ.parse(code1, parsers=2, copies=1)
def test_paren_in_strange_position(differ):
code1 = dedent('''\
class C:
""" ha """
def __init__(self, message):
self.message = message
''')
code2 = dedent('''\
class C:
""" ha """
)
def __init__(self, message):
self.message = message
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=0, copies=2)
def insert_line_into_code(code, index, line):
lines = split_lines(code, keepends=True)
lines.insert(index, line)
return ''.join(lines)
def test_paren_before_docstring(differ):
code1 = dedent('''\
# comment
"""
The
"""
from parso import tree
from parso import python
''')
code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
differ.parse(code1, parsers=2, copies=1)
def test_parentheses_before_method(differ):
code1 = dedent('''\
class A:
def a(self):
pass
class B:
def b(self):
if 1:
pass
''')
code2 = dedent('''\
class A:
def a(self):
pass
Exception.__init__(self, "x" %
def b(self):
if 1:
pass
''')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)
def test_indentation_issues(differ):
code1 = dedent('''\
class C:
def f():
1
if 2:
return 3
def g():
to_be_removed
pass
''')
code2 = dedent('''\
class C:
def f():
1
``something``, very ``weird``).
if 2:
return 3
def g():
to_be_removed
pass
''')
code3 = dedent('''\
class C:
def f():
1
if 2:
return 3
def g():
pass
''')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
differ.parse(code1, copies=2)
differ.parse(code3, parsers=2, copies=1)
differ.parse(code1, parsers=1, copies=2)
def test_error_dedent_issues(differ):
code1 = dedent('''\
while True:
try:
1
except KeyError:
if 2:
3
except IndexError:
4
5
''')
code2 = dedent('''\
while True:
try:
except KeyError:
1
except KeyError:
if 2:
3
except IndexError:
4
something_inserted
5
''')
differ.initialize(code1)
differ.parse(code2, parsers=6, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=0)
def test_random_text_insertion(differ):
code1 = dedent('''\
class C:
def f():
return node
def g():
try:
1
except KeyError:
2
''')
code2 = dedent('''\
class C:
def f():
return node
Some'random text: yeah
for push in plan.dfa_pushes:
def g():
try:
1
except KeyError:
2
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)
def test_many_nested_ifs(differ):
code1 = dedent('''\
class C:
def f(self):
def iterate():
if 1:
yield t
else:
yield
return
def g():
3
''')
code2 = dedent('''\
def f(self):
def iterate():
if 1:
yield t
hahahaha
if 2:
else:
yield
return
def g():
3
''')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)
@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5")
@pytest.mark.parametrize('prefix', ['', 'async '])
def test_with_and_funcdef_in_call(differ, prefix):
code1 = prefix + dedent('''\
with x:
la = C(
a=1,
b=2,
c=3,
)
''')
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
differ.initialize(code1)
differ.parse(code2, parsers=3, expect_error_leaves=True)
differ.parse(code1, parsers=1)
def test_wrong_backslash(differ):
code1 = dedent('''\
def y():
1
for x in y:
continue
''')
code2 = insert_line_into_code(code1, 3, '\\.whl$\n')
differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
differ.parse(code1, parsers=1, copies=1)
def test_comment_change(differ):
differ.initialize('')
def test_random_unicode_characters(differ):
"""
Those issues were all found with the fuzzer.
"""
differ.initialize('')
differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
differ.parse(u'\r\r', parsers=1)
differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1)
s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):'
differ.parse(s, parsers=1, expect_error_leaves=True)
differ.parse('')
differ.parse(s + '\n', parsers=1, expect_error_leaves=True)
differ.parse(u' result = (\r\f\x17\t\x11res)', parsers=2, expect_error_leaves=True)
differ.parse('')
differ.parse(' a( # xx\ndef', parsers=2, expect_error_leaves=True)
@pytest.mark.skipif(sys.version_info < (2, 7), reason="No set literals in Python 2.6")
def test_dedent_end_positions(differ):
code1 = dedent('''\
if 1:
if b:
2
c = {
5}
''')
code2 = dedent('''\
if 1:
if ⌟ഒᜈྡྷṭb:
2
'l': ''}
c = {
5}
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, expect_error_leaves=True)
differ.parse(code1, parsers=1)
def test_special_no_newline_ending(differ):
code1 = dedent('''\
1
''')
code2 = dedent('''\
1
is ''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=0)
def test_random_character_insertion(differ):
code1 = dedent('''\
def create(self):
1
if self.path is not None:
return
# 3
# 4
''')
code2 = dedent('''\
def create(self):
1
if 2:
x return
# 3
# 4
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=3, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=1)
def test_import_opening_bracket(differ):
code1 = dedent('''\
1
2
from bubu import (X,
''')
code2 = dedent('''\
11
2
from bubu import (X,
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=2, expect_error_leaves=True)
def test_opening_bracket_at_end(differ):
code1 = dedent('''\
class C:
1
[
''')
code2 = dedent('''\
3
class C:
1
[
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
def test_all_sorts_of_indentation(differ):
code1 = dedent('''\
class C:
1
def f():
'same'
if foo:
a = b
end
''')
code2 = dedent('''\
class C:
1
def f(yield await %|(
'same'
\x02\x06\x0f\x1c\x11
if foo:
a = b
end
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=4, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=3)
code3 = dedent('''\
if 1:
a
b
c
d
\x00
''')
differ.parse(code3, parsers=2, expect_error_leaves=True)
differ.parse('')
def test_dont_copy_dedents_in_beginning(differ):
code1 = dedent('''\
a
4
''')
code2 = dedent('''\
1
2
3
4
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(code1, parsers=2)
def test_dont_copy_error_leaves(differ):
code1 = dedent('''\
def f(n):
x
if 2:
3
''')
code2 = dedent('''\
def f(n):
def if 1:
indent
x
if 2:
3
''')
differ.initialize(code1)
differ.parse(code2, parsers=1, expect_error_leaves=True)
differ.parse(code1, parsers=2)
def test_error_dedent_in_between(differ):
code1 = dedent('''\
class C:
def f():
a
if something:
x
z
''')
code2 = dedent('''\
class C:
def f():
a
dedent
if other_thing:
b
if something:
x
z
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=2)
def test_some_other_indentation_issues(differ):
code1 = dedent('''\
class C:
x
def f():
""
copied
a
''')
code2 = dedent('''\
try:
de
a
b
c
d
def f():
""
copied
a
''')
differ.initialize(code1)
differ.parse(code2, copies=2, parsers=1, expect_error_leaves=True)
differ.parse(code1, copies=2, parsers=2)
def test_open_bracket_case1(differ):
code1 = dedent('''\
class C:
1
2 # ha
''')
code2 = insert_line_into_code(code1, 2, ' [str\n')
code3 = insert_line_into_code(code2, 4, ' str\n')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(code3, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(code1, copies=1, parsers=1)
def test_open_bracket_case2(differ):
code1 = dedent('''\
class C:
def f(self):
(
b
c
def g(self):
d
''')
code2 = dedent('''\
class C:
def f(self):
(
b
c
self.
def g(self):
d
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
differ.parse(code1, copies=2, parsers=0, expect_error_leaves=True)
def test_some_weird_removals(differ):
code1 = dedent('''\
class C:
1
''')
code2 = dedent('''\
class C:
1
@property
A
return
# x
omega
''')
code3 = dedent('''\
class C:
1
;
omega
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
differ.parse(code3, copies=1, parsers=2, expect_error_leaves=True)
differ.parse(code1, copies=1)
@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5")
def test_async_copy(differ):
code1 = dedent('''\
async def main():
x = 3
print(
''')
code2 = dedent('''\
async def main():
x = 3
print()
''')
differ.initialize(code1)
differ.parse(code2, copies=1, parsers=1)
differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)

View File

@@ -79,11 +79,17 @@ def test_tokenize_start_pos(code, positions):
assert positions == [p.start_pos for p in tokens]
def test_roundtrip(grammar):
code = dedent("""\
f'''s{
str.uppe
'''
""")
@pytest.mark.parametrize(
'code', [
dedent("""\
f'''s{
str.uppe
'''
"""),
'f"foo',
'f"""foo',
]
)
def test_roundtrip(grammar, code):
tree = grammar.parse(code)
assert tree.get_code() == code

View File

@@ -106,14 +106,15 @@ def test_end_newlines():
@pytest.mark.parametrize(('code', 'types'), [
('\r', ['error_leaf', 'endmarker']),
('\n\r', ['error_leaf', 'endmarker'])
('\r', ['endmarker']),
('\n\r', ['endmarker'])
])
def test_carriage_return_at_end(code, types):
"""
By adding an artificial newline this creates weird side effects for
\r at the end of files that would normally be error leafs.
By adding an artificial newline this created weird side effects for
\r at the end of files.
"""
tree = parse(code)
assert tree.get_code() == code
assert [c.type for c in tree.children] == types
assert tree.end_pos == (len(code) + 1, 0)

View File

@@ -258,6 +258,11 @@ def test_too_many_levels_of_indentation():
@pytest.mark.parametrize(
'code', [
"f'{*args,}'",
r'f"\""',
r'f"\\\""',
r'fr"\""',
r'fr"\\\""',
r"print(f'Some {x:.2f} and some {y}')",
]
)
def test_valid_fstrings(code):
@@ -267,6 +272,8 @@ def test_valid_fstrings(code):
@pytest.mark.parametrize(
('code', 'message'), [
("f'{1+}'", ('invalid syntax')),
(r'f"\"', ('invalid syntax')),
(r'fr"\"', ('invalid syntax')),
]
)
def test_invalid_fstrings(code, message):

View File

@@ -23,11 +23,13 @@ OP = PythonTokenTypes.OP
ENDMARKER = PythonTokenTypes.ENDMARKER
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
FSTRING_START = PythonTokenTypes.FSTRING_START
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
FSTRING_END = PythonTokenTypes.FSTRING_END
def _get_token_list(string):
def _get_token_list(string, version=None):
# Load the current version.
version_info = parse_version_string()
version_info = parse_version_string(version)
return list(tokenize.tokenize(string, version_info))
@@ -197,11 +199,12 @@ def test_ur_literals():
def test_error_literal():
error_token, endmarker = _get_token_list('"\n')
error_token, newline, endmarker = _get_token_list('"\n')
assert error_token.type == ERRORTOKEN
assert error_token.string == '"'
assert newline.type == NEWLINE
assert endmarker.type == ENDMARKER
assert endmarker.prefix == '\n'
assert endmarker.prefix == ''
bracket, error_token, endmarker = _get_token_list('( """')
assert error_token.type == ERRORTOKEN
@@ -240,9 +243,102 @@ def test_indentation(code, types):
def test_error_string():
t1, endmarker = _get_token_list(' "\n')
t1, newline, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN
assert t1.prefix == ' '
assert t1.string == '"'
assert endmarker.prefix == '\n'
assert newline.type == NEWLINE
assert endmarker.prefix == ''
assert endmarker.string == ''
def test_indent_error_recovery():
code = dedent("""\
str(
from x import a
def
""")
lst = _get_token_list(code)
expected = [
# `str(`
INDENT, NAME, OP,
# `from parso`
NAME, NAME,
# `import a` on same line as the previous from parso
NAME, NAME, NEWLINE,
# Dedent happens, because there's an import now and the import
# statement "breaks" out of the opening paren on the first line.
DEDENT,
# `b`
NAME, NEWLINE, ENDMARKER]
assert [t.type for t in lst] == expected
def test_error_token_after_dedent():
code = dedent("""\
class C:
pass
$foo
""")
lst = _get_token_list(code)
expected = [
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
# $foo\n
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
]
assert [t.type for t in lst] == expected
def test_brackets_no_indentation():
"""
There used to be an issue that the parentheses counting would go below
zero. This should not happen.
"""
code = dedent("""\
}
{
}
""")
lst = _get_token_list(code)
assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER]
def test_form_feed():
error_token, endmarker = _get_token_list(dedent('''\
\f"""'''))
assert error_token.prefix == '\f'
assert error_token.string == '"""'
assert endmarker.prefix == ''
def test_carriage_return():
lst = _get_token_list(' =\\\rclass')
assert [t.type for t in lst] == [INDENT, OP, DEDENT, NAME, ENDMARKER]
def test_backslash():
code = '\\\n# 1 \n'
endmarker, = _get_token_list(code)
assert endmarker.prefix == code
@pytest.mark.parametrize(
('code', 'types'), [
('f"', [FSTRING_START]),
('f""', [FSTRING_START, FSTRING_END]),
('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
(r'print(f"Some {x:.2f}a{y}")', [
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
]),
]
)
def test_fstring(code, types, version_ge_py36):
actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
assert types + [ENDMARKER] == actual_types

View File

@@ -3,21 +3,42 @@ from codecs import BOM_UTF8
from parso.utils import split_lines, python_bytes_to_unicode
import parso
def test_split_lines_no_keepends():
assert split_lines('asd\r\n') == ['asd', '']
assert split_lines('asd\r\n\f') == ['asd', '\f']
assert split_lines('\fasd\r\n') == ['\fasd', '']
assert split_lines('') == ['']
assert split_lines('\n') == ['', '']
import pytest
def test_split_lines_keepends():
assert split_lines('asd\r\n', keepends=True) == ['asd\r\n', '']
assert split_lines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
assert split_lines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
assert split_lines('', keepends=True) == ['']
assert split_lines('\n', keepends=True) == ['\n', '']
@pytest.mark.parametrize(
('string', 'expected_result', 'keepends'), [
('asd\r\n', ['asd', ''], False),
('asd\r\n', ['asd\r\n', ''], True),
('asd\r', ['asd', ''], False),
('asd\r', ['asd\r', ''], True),
('asd\n', ['asd', ''], False),
('asd\n', ['asd\n', ''], True),
('asd\r\n\f', ['asd', '\f'], False),
('asd\r\n\f', ['asd\r\n', '\f'], True),
('\fasd\r\n', ['\fasd', ''], False),
('\fasd\r\n', ['\fasd\r\n', ''], True),
('', [''], False),
('', [''], True),
('\n', ['', ''], False),
('\n', ['\n', ''], True),
('\r', ['', ''], False),
('\r', ['\r', ''], True),
# Invalid line breaks
('a\vb', ['a\vb'], False),
('a\vb', ['a\vb'], True),
('\x1C', ['\x1C'], False),
('\x1C', ['\x1C'], True),
]
)
def test_split_lines(string, expected_result, keepends):
assert split_lines(string, keepends=keepends) == expected_result
def test_python_bytes_to_unicode_unicode_text():

12
tox.ini
View File

@@ -1,23 +1,19 @@
[tox]
envlist = py27, py33, py34, py35, py36, py37
envlist = py27, py33, py34, py35, py36, py37, pypy
[testenv]
extras = testing
deps =
{env:_SETUPTOOLS_DEP:setuptools}
{env:_PARSO_TEST_PYTEST_DEP:pytest>=3.0.7}
# For --lf and --ff.
pytest-cache
py26,py33: pytest>=3.0.7,<3.3
py26,py33: setuptools<37
setenv =
# https://github.com/tomchristie/django-rest-framework/issues/1957
# tox corrupts __pycache__, solution from here:
PYTHONDONTWRITEBYTECODE=1
py26,py33: _PARSO_TEST_PYTEST_DEP=pytest>=3.0.7,<3.3
py26,py33: _SETUPTOOLS_DEP=setuptools<37
commands =
pytest {posargs:parso test}
[testenv:cov]
deps =
coverage
{[testenv]deps}
commands =
coverage run --source parso -m pytest
coverage report