mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 21:34:32 +08:00
Compare commits
276 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1ee7614c9 | ||
|
|
58850f8bfa | ||
|
|
d38a60278e | ||
|
|
6c65aea47d | ||
|
|
0d37ff865c | ||
|
|
076e296497 | ||
|
|
a2b153e3c1 | ||
|
|
bb2855897b | ||
|
|
9c9e6ffede | ||
|
|
b5d8175eaa | ||
|
|
32a83b932a | ||
|
|
01ae01a382 | ||
|
|
5fbc207892 | ||
|
|
60e4591837 | ||
|
|
ef56debb78 | ||
|
|
dc2582f488 | ||
|
|
fe69989fbc | ||
|
|
ce8b531175 | ||
|
|
069c08883a | ||
|
|
0da0a8655a | ||
|
|
3d890c3a00 | ||
|
|
956ea55048 | ||
|
|
0bd17bee2c | ||
|
|
f3015efb2d | ||
|
|
197391dc53 | ||
|
|
32321a74b1 | ||
|
|
52d01685ba | ||
|
|
e591b929eb | ||
|
|
dac4c445a7 | ||
|
|
20fd32b45d | ||
|
|
9cc8178998 | ||
|
|
1e25445176 | ||
|
|
d7171ae927 | ||
|
|
d3d28480ed | ||
|
|
564be7882e | ||
|
|
76c5754b76 | ||
|
|
55247a5a2c | ||
|
|
7ae1efe5c7 | ||
|
|
01dba7f8ce | ||
|
|
ea8a758051 | ||
|
|
a7e24a37e7 | ||
|
|
f80d9de7a0 | ||
|
|
eaee2b9ca0 | ||
|
|
dd1761da96 | ||
|
|
e10802ab09 | ||
|
|
3d402d0a77 | ||
|
|
f6a8b997f2 | ||
|
|
94c2681c8e | ||
|
|
610a820799 | ||
|
|
57320af6eb | ||
|
|
574e1c63e8 | ||
|
|
fbaad7883f | ||
|
|
b1f613fe16 | ||
|
|
f4696a6245 | ||
|
|
48c1a0e590 | ||
|
|
6f63147f69 | ||
|
|
94bd48bae1 | ||
|
|
edbceba4f8 | ||
|
|
b33c2b3ae1 | ||
|
|
65a0748f4f | ||
|
|
c442cf98be | ||
|
|
65b15b05e3 | ||
|
|
26aee1c6a9 | ||
|
|
c88a862bae | ||
|
|
d6b0585933 | ||
|
|
6eba40b4c5 | ||
|
|
428bde0573 | ||
|
|
d1d866f6c6 | ||
|
|
a8ec75fedd | ||
|
|
deaf1f310b | ||
|
|
2a881bf875 | ||
|
|
4d713f56e9 | ||
|
|
d202fdea49 | ||
|
|
5e6d5dec59 | ||
|
|
c1846dd082 | ||
|
|
5da51720cd | ||
|
|
fde64d0eae | ||
|
|
430f13af5e | ||
|
|
96ae6a078b | ||
|
|
a9f58b7c45 | ||
|
|
e0d0e57bd0 | ||
|
|
d2542983e9 | ||
|
|
64cf24d9da | ||
|
|
02f48a68f2 | ||
|
|
c7c464e5e9 | ||
|
|
29325d3052 | ||
|
|
750b8af37b | ||
|
|
0126a38bd1 | ||
|
|
c2985c111e | ||
|
|
45f9d4b204 | ||
|
|
f99fe6ad21 | ||
|
|
a64c32bb2a | ||
|
|
e5fb1927bb | ||
|
|
0ef4809377 | ||
|
|
29456a6c0a | ||
|
|
ada84ed063 | ||
|
|
1c7b078db0 | ||
|
|
930ec08ab0 | ||
|
|
a90622040d | ||
|
|
98c02f7d79 | ||
|
|
d6d6c5038f | ||
|
|
3be8ac7786 | ||
|
|
96f1582b6e | ||
|
|
7064ecf3fb | ||
|
|
e6bc924fba | ||
|
|
59605438e9 | ||
|
|
e7f71a3eba | ||
|
|
3f7aad84f9 | ||
|
|
52e3db4834 | ||
|
|
0daf4d9068 | ||
|
|
29b6232541 | ||
|
|
e05d7fd59f | ||
|
|
7f964c26f2 | ||
|
|
ff67de248f | ||
|
|
1af5d9d46b | ||
|
|
fce3ead829 | ||
|
|
55d5d39c53 | ||
|
|
c8bf23b787 | ||
|
|
98c9a1ec7f | ||
|
|
ecdb90d9bc | ||
|
|
375ebf2181 | ||
|
|
badb2fe010 | ||
|
|
8e118c913c | ||
|
|
52fc8fc569 | ||
|
|
97cdb448d4 | ||
|
|
603b67ee6d | ||
|
|
7686273287 | ||
|
|
692436ba12 | ||
|
|
f7d3d4e82f | ||
|
|
edce279dee | ||
|
|
a9e40eb578 | ||
|
|
b14f518306 | ||
|
|
8407894b25 | ||
|
|
e4efebc9f3 | ||
|
|
f66e47c540 | ||
|
|
706a92ee0d | ||
|
|
91d864b23d | ||
|
|
e20f2069ba | ||
|
|
4cf198285a | ||
|
|
30cf491b4f | ||
|
|
c1675da0cb | ||
|
|
7b7b66eb3c | ||
|
|
5d46c3e18b | ||
|
|
e9fde82512 | ||
|
|
a46ecbb499 | ||
|
|
da5aa8a2ab | ||
|
|
43d4a8a834 | ||
|
|
309033ae2d | ||
|
|
2a9d8632fe | ||
|
|
530a324643 | ||
|
|
71003bc20e | ||
|
|
c5d141bf60 | ||
|
|
e958b241c7 | ||
|
|
34ab35558f | ||
|
|
03de9cebb8 | ||
|
|
6098d89150 | ||
|
|
ff4358cd97 | ||
|
|
b5378e4602 | ||
|
|
33e321a539 | ||
|
|
a890ddd6cc | ||
|
|
1362d4f05d | ||
|
|
532aef2342 | ||
|
|
878b4b2d3b | ||
|
|
87299335c4 | ||
|
|
4f0e9c0fd7 | ||
|
|
67ca091631 | ||
|
|
4e5ba02dbb | ||
|
|
a85f544901 | ||
|
|
9e8066c6fd | ||
|
|
68eab72229 | ||
|
|
d9264609f2 | ||
|
|
79c7e0b59d | ||
|
|
f03a87b876 | ||
|
|
2a082d69df | ||
|
|
e6fc739670 | ||
|
|
12e11b3d16 | ||
|
|
cc8038966b | ||
|
|
31aecf2d35 | ||
|
|
d8554d86d1 | ||
|
|
d691bf0fd1 | ||
|
|
5712ffb5ca | ||
|
|
55d6a69aad | ||
|
|
453471eeb6 | ||
|
|
a06c3a3129 | ||
|
|
73ce57428b | ||
|
|
640f544af9 | ||
|
|
b6cbf306d7 | ||
|
|
95e4ecf592 | ||
|
|
fbed1ecfe0 | ||
|
|
1f27fa9320 | ||
|
|
23362ec2d3 | ||
|
|
6b391af071 | ||
|
|
c43cb21a0e | ||
|
|
24346a0d32 | ||
|
|
9d452ec66a | ||
|
|
567e0d7aed | ||
|
|
1f02327cff | ||
|
|
8c348aee6f | ||
|
|
a277ccf288 | ||
|
|
a5ce2caab6 | ||
|
|
da4df9c0f1 | ||
|
|
bd444df417 | ||
|
|
275dbca1b9 | ||
|
|
9a0b6f4928 | ||
|
|
fc5560874b | ||
|
|
6e5a520e7b | ||
|
|
dcabf3d415 | ||
|
|
3bc82d112d | ||
|
|
ec186a78f8 | ||
|
|
3818fb2b22 | ||
|
|
95ddeb4012 | ||
|
|
f638abb08e | ||
|
|
f8558df27a | ||
|
|
bae56e72e1 | ||
|
|
41c38311f7 | ||
|
|
eeb456a6d4 | ||
|
|
1c0956d9e0 | ||
|
|
c17156bd36 | ||
|
|
8865aa452c | ||
|
|
e0c79a9fcc | ||
|
|
3c08b1b058 | ||
|
|
0f32673092 | ||
|
|
1e18163402 | ||
|
|
cef9f1bdbd | ||
|
|
23db71a5f7 | ||
|
|
34154d05a0 | ||
|
|
6f385bdba1 | ||
|
|
4fc31c58b3 | ||
|
|
689decc66c | ||
|
|
c2eacdb81c | ||
|
|
ac0bf4fcdd | ||
|
|
948f9ccecc | ||
|
|
f20106d88e | ||
|
|
f4912f6c17 | ||
|
|
bf5a4b7c2c | ||
|
|
579146b501 | ||
|
|
deb4dbce1c | ||
|
|
8eda8decea | ||
|
|
f6935935c0 | ||
|
|
d3fa7e1cad | ||
|
|
83d9abd036 | ||
|
|
222e9117b4 | ||
|
|
eda2207e6c | ||
|
|
a91e5f2775 | ||
|
|
cba4f2ccc1 | ||
|
|
8f1a436ba1 | ||
|
|
9941348ec6 | ||
|
|
afb71dc762 | ||
|
|
0d96b12566 | ||
|
|
9d2ce4bcd4 | ||
|
|
a3e280c2b9 | ||
|
|
7c7f4f4e54 | ||
|
|
56b3e2cdc8 | ||
|
|
97f042c6ba | ||
|
|
b1aa7c6a79 | ||
|
|
235fda3fbb | ||
|
|
d8d2e596a5 | ||
|
|
e05ce5ae31 | ||
|
|
25e4ea9c24 | ||
|
|
9f88fe16a3 | ||
|
|
ba0e7a2e9d | ||
|
|
dc80152ff8 | ||
|
|
9e3154d167 | ||
|
|
065da34272 | ||
|
|
f89809de9a | ||
|
|
332c57ebcb | ||
|
|
acb173b703 | ||
|
|
47e78b37fe | ||
|
|
fc44af6165 | ||
|
|
73439d5863 | ||
|
|
085aad3038 | ||
|
|
7db500bfbc | ||
|
|
e689f3dce6 | ||
|
|
b076cdc12a | ||
|
|
0dea94c801 | ||
|
|
6cf487aee2 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -9,3 +9,5 @@
|
|||||||
/dist/
|
/dist/
|
||||||
parso.egg-info/
|
parso.egg-info/
|
||||||
/.cache/
|
/.cache/
|
||||||
|
/.pytest_cache
|
||||||
|
test/fuzz-redo.pickle
|
||||||
|
|||||||
@@ -3,18 +3,17 @@ sudo: false
|
|||||||
python:
|
python:
|
||||||
- 2.6
|
- 2.6
|
||||||
- 2.7
|
- 2.7
|
||||||
- 3.3
|
|
||||||
- 3.4
|
- 3.4
|
||||||
- 3.5
|
- 3.5
|
||||||
- 3.6
|
- 3.6
|
||||||
- 3.7
|
|
||||||
- pypy
|
- pypy
|
||||||
matrix:
|
matrix:
|
||||||
allow_failures:
|
|
||||||
- env: TOXENV=cov
|
|
||||||
include:
|
include:
|
||||||
|
- { python: "3.7", dist: xenial, sudo: true }
|
||||||
- python: 3.5
|
- python: 3.5
|
||||||
env: TOXENV=cov
|
env: TOXENV=cov
|
||||||
|
allow_failures:
|
||||||
|
- env: TOXENV=cov
|
||||||
install:
|
install:
|
||||||
- pip install --quiet tox-travis
|
- pip install --quiet tox-travis
|
||||||
script:
|
script:
|
||||||
|
|||||||
@@ -3,6 +3,42 @@
|
|||||||
Changelog
|
Changelog
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
0.3.3 (2018-02-06)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- Fix async errors in the diff parser
|
||||||
|
- A fix in iter_errors
|
||||||
|
- This is a very small bugfix release
|
||||||
|
|
||||||
|
0.3.2 (2018-01-24)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- 20+ bugfixes in the diff parser and 3 in the tokenizer
|
||||||
|
- A fuzzer for the diff parser, to give confidence that the diff parser is in a
|
||||||
|
good shape.
|
||||||
|
- Some bugfixes for f-string
|
||||||
|
|
||||||
|
0.3.1 (2018-07-09)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- Bugfixes in the diff parser and keyword-only arguments
|
||||||
|
|
||||||
|
0.3.0 (2018-06-30)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- Rewrote the pgen2 parser generator.
|
||||||
|
|
||||||
|
0.2.1 (2018-05-21)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- A bugfix for the diff parser.
|
||||||
|
- Grammar files can now be loaded from a specific path.
|
||||||
|
|
||||||
|
0.2.0 (2018-04-15)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- f-strings are now parsed as a part of the normal Python grammar. This makes
|
||||||
|
it way easier to deal with them.
|
||||||
|
|
||||||
0.1.1 (2017-11-05)
|
0.1.1 (2017-11-05)
|
||||||
+++++++++++++++++++
|
+++++++++++++++++++
|
||||||
|
|||||||
15
README.rst
15
README.rst
@@ -2,12 +2,13 @@
|
|||||||
parso - A Python Parser
|
parso - A Python Parser
|
||||||
###################################################################
|
###################################################################
|
||||||
|
|
||||||
.. image:: https://secure.travis-ci.org/davidhalter/parso.png?branch=master
|
|
||||||
:target: http://travis-ci.org/davidhalter/parso
|
|
||||||
:alt: Travis-CI build status
|
|
||||||
|
|
||||||
.. image:: https://coveralls.io/repos/davidhalter/parso/badge.png?branch=master
|
.. image:: https://travis-ci.org/davidhalter/parso.svg?branch=master
|
||||||
:target: https://coveralls.io/r/davidhalter/parso
|
:target: https://travis-ci.org/davidhalter/parso
|
||||||
|
:alt: Travis CI build status
|
||||||
|
|
||||||
|
.. image:: https://coveralls.io/repos/github/davidhalter/parso/badge.svg?branch=master
|
||||||
|
:target: https://coveralls.io/github/davidhalter/parso?branch=master
|
||||||
:alt: Coverage Status
|
:alt: Coverage Status
|
||||||
|
|
||||||
.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
|
.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
|
||||||
@@ -55,10 +56,10 @@ To list multiple issues:
|
|||||||
Resources
|
Resources
|
||||||
=========
|
=========
|
||||||
|
|
||||||
- `Testing <http://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
|
- `Testing <https://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
|
||||||
- `PyPI <https://pypi.python.org/pypi/parso>`_
|
- `PyPI <https://pypi.python.org/pypi/parso>`_
|
||||||
- `Docs <https://parso.readthedocs.org/en/latest/>`_
|
- `Docs <https://parso.readthedocs.org/en/latest/>`_
|
||||||
- Uses `semantic versioning <http://semver.org/>`_
|
- Uses `semantic versioning <https://semver.org/>`_
|
||||||
|
|
||||||
Installation
|
Installation
|
||||||
============
|
============
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ def pytest_generate_tests(metafunc):
|
|||||||
metafunc.parametrize('each_py2_version', VERSIONS_2)
|
metafunc.parametrize('each_py2_version', VERSIONS_2)
|
||||||
elif 'each_py3_version' in metafunc.fixturenames:
|
elif 'each_py3_version' in metafunc.fixturenames:
|
||||||
metafunc.parametrize('each_py3_version', VERSIONS_3)
|
metafunc.parametrize('each_py3_version', VERSIONS_3)
|
||||||
|
elif 'version_ge_py36' in metafunc.fixturenames:
|
||||||
|
metafunc.parametrize('version_ge_py36', ['3.6', '3.7'])
|
||||||
|
|
||||||
|
|
||||||
class NormalizerIssueCase(object):
|
class NormalizerIssueCase(object):
|
||||||
@@ -151,8 +153,5 @@ def works_ge_py3(each_version):
|
|||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def works_ge_py35(each_version):
|
def works_ge_py35(each_version):
|
||||||
"""
|
|
||||||
Works only greater equal Python 3.3.
|
|
||||||
"""
|
|
||||||
version_info = parse_version_string(each_version)
|
version_info = parse_version_string(each_version)
|
||||||
return Checker(each_version, version_info >= (3, 5))
|
return Checker(each_version, version_info >= (3, 5))
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ if [[ $tag_ref ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
git tag $tag
|
git tag -a $tag
|
||||||
git push --tags
|
git push --tags
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
1
docs/_themes/flask/layout.html
vendored
1
docs/_themes/flask/layout.html
vendored
@@ -19,7 +19,6 @@
|
|||||||
{% endblock %}
|
{% endblock %}
|
||||||
{%- block footer %}
|
{%- block footer %}
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
© Copyright {{ copyright }}.
|
|
||||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
|
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
|
||||||
</div>
|
</div>
|
||||||
{% if pagename == 'index' %}
|
{% if pagename == 'index' %}
|
||||||
|
|||||||
@@ -13,7 +13,6 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import datetime
|
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
@@ -45,7 +44,7 @@ master_doc = 'index'
|
|||||||
|
|
||||||
# General information about the project.
|
# General information about the project.
|
||||||
project = u'parso'
|
project = u'parso'
|
||||||
copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
|
copyright = u'parso contributors'
|
||||||
|
|
||||||
import parso
|
import parso
|
||||||
from parso.utils import version_info
|
from parso.utils import version_info
|
||||||
@@ -145,7 +144,7 @@ html_sidebars = {
|
|||||||
#'relations.html',
|
#'relations.html',
|
||||||
'ghbuttons.html',
|
'ghbuttons.html',
|
||||||
#'sourcelink.html',
|
#'sourcelink.html',
|
||||||
#'searchbox.html'
|
'searchbox.html'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -61,6 +61,8 @@ Used By
|
|||||||
-------
|
-------
|
||||||
|
|
||||||
- jedi_ (which is used by IPython and a lot of editor plugins).
|
- jedi_ (which is used by IPython and a lot of editor plugins).
|
||||||
|
- mutmut_ (mutation tester)
|
||||||
|
|
||||||
|
|
||||||
.. _jedi: https://github.com/davidhalter/jedi
|
.. _jedi: https://github.com/davidhalter/jedi
|
||||||
|
.. _mutmut: https://github.com/boxed/mutmut
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
|
|||||||
from parso.utils import split_lines, python_bytes_to_unicode
|
from parso.utils import split_lines, python_bytes_to_unicode
|
||||||
|
|
||||||
|
|
||||||
__version__ = '0.1.1'
|
__version__ = '0.3.3'
|
||||||
|
|
||||||
|
|
||||||
def parse(code=None, **kwargs):
|
def parse(code=None, **kwargs):
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ except AttributeError:
|
|||||||
def u(string):
|
def u(string):
|
||||||
"""Cast to unicode DAMMIT!
|
"""Cast to unicode DAMMIT!
|
||||||
Written because Python2 repr always implicitly casts to a string, so we
|
Written because Python2 repr always implicitly casts to a string, so we
|
||||||
have to cast back to a unicode (and we now that we always deal with valid
|
have to cast back to a unicode (and we know that we always deal with valid
|
||||||
unicode, because we check that in the beginning).
|
unicode, because we check that in the beginning).
|
||||||
"""
|
"""
|
||||||
if py_version >= 30:
|
if py_version >= 30:
|
||||||
|
|||||||
@@ -2,17 +2,16 @@ import hashlib
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from parso._compatibility import FileNotFoundError, is_pypy
|
from parso._compatibility import FileNotFoundError, is_pypy
|
||||||
from parso.pgen2.pgen import generate_grammar
|
from parso.pgen2 import generate_grammar
|
||||||
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
||||||
from parso.python.diff import DiffParser
|
from parso.python.diff import DiffParser
|
||||||
from parso.python.tokenize import tokenize_lines, tokenize
|
from parso.python.tokenize import tokenize_lines, tokenize
|
||||||
from parso.python import token
|
from parso.python.token import PythonTokenTypes
|
||||||
from parso.cache import parser_cache, load_module, save_module
|
from parso.cache import parser_cache, load_module, save_module
|
||||||
from parso.parser import BaseParser
|
from parso.parser import BaseParser
|
||||||
from parso.python.parser import Parser as PythonParser
|
from parso.python.parser import Parser as PythonParser
|
||||||
from parso.python.errors import ErrorFinderConfig
|
from parso.python.errors import ErrorFinderConfig
|
||||||
from parso.python import pep8
|
from parso.python import pep8
|
||||||
from parso.python import fstring
|
|
||||||
|
|
||||||
_loaded_grammars = {}
|
_loaded_grammars = {}
|
||||||
|
|
||||||
@@ -21,7 +20,7 @@ class Grammar(object):
|
|||||||
"""
|
"""
|
||||||
:py:func:`parso.load_grammar` returns instances of this class.
|
:py:func:`parso.load_grammar` returns instances of this class.
|
||||||
|
|
||||||
Creating custom grammars by calling this is not supported, yet.
|
Creating custom none-python grammars by calling this is not supported, yet.
|
||||||
"""
|
"""
|
||||||
#:param text: A BNF representation of your grammar.
|
#:param text: A BNF representation of your grammar.
|
||||||
_error_normalizer_config = None
|
_error_normalizer_config = None
|
||||||
@@ -52,8 +51,8 @@ class Grammar(object):
|
|||||||
it is invalid, it will be returned as an error node. If disabled,
|
it is invalid, it will be returned as an error node. If disabled,
|
||||||
you will get a ParseError when encountering syntax errors in your
|
you will get a ParseError when encountering syntax errors in your
|
||||||
code.
|
code.
|
||||||
:param str start_symbol: The grammar symbol that you want to parse. Only
|
:param str start_symbol: The grammar rule (nonterminal) that you want
|
||||||
allowed to be used when error_recovery is False.
|
to parse. Only allowed to be used when error_recovery is False.
|
||||||
:param str path: The path to the file you want to open. Only needed for caching.
|
:param str path: The path to the file you want to open. Only needed for caching.
|
||||||
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
||||||
if a path is given. Returns the cached trees if the corresponding
|
if a path is given. Returns the cached trees if the corresponding
|
||||||
@@ -73,7 +72,7 @@ class Grammar(object):
|
|||||||
:py:class:`parso.python.tree.Module`.
|
:py:class:`parso.python.tree.Module`.
|
||||||
"""
|
"""
|
||||||
if 'start_pos' in kwargs:
|
if 'start_pos' in kwargs:
|
||||||
raise TypeError("parse() got an unexpected keyworda argument.")
|
raise TypeError("parse() got an unexpected keyword argument.")
|
||||||
return self._parse(code=code, **kwargs)
|
return self._parse(code=code, **kwargs)
|
||||||
|
|
||||||
def _parse(self, code=None, error_recovery=True, path=None,
|
def _parse(self, code=None, error_recovery=True, path=None,
|
||||||
@@ -89,7 +88,7 @@ class Grammar(object):
|
|||||||
raise TypeError("Please provide either code or a path.")
|
raise TypeError("Please provide either code or a path.")
|
||||||
|
|
||||||
if start_symbol is None:
|
if start_symbol is None:
|
||||||
start_symbol = self._start_symbol
|
start_symbol = self._start_nonterminal
|
||||||
|
|
||||||
if error_recovery and start_symbol != 'file_input':
|
if error_recovery and start_symbol != 'file_input':
|
||||||
raise NotImplementedError("This is currently not implemented.")
|
raise NotImplementedError("This is currently not implemented.")
|
||||||
@@ -137,7 +136,7 @@ class Grammar(object):
|
|||||||
p = self._parser(
|
p = self._parser(
|
||||||
self._pgen_grammar,
|
self._pgen_grammar,
|
||||||
error_recovery=error_recovery,
|
error_recovery=error_recovery,
|
||||||
start_symbol=start_symbol
|
start_nonterminal=start_symbol
|
||||||
)
|
)
|
||||||
root_node = p.parse(tokens=tokens)
|
root_node = p.parse(tokens=tokens)
|
||||||
|
|
||||||
@@ -186,17 +185,16 @@ class Grammar(object):
|
|||||||
normalizer.walk(node)
|
normalizer.walk(node)
|
||||||
return normalizer.issues
|
return normalizer.issues
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
labels = self._pgen_grammar.number2symbol.values()
|
nonterminals = self._pgen_grammar.nonterminal_to_dfas.keys()
|
||||||
txt = ' '.join(list(labels)[:3]) + ' ...'
|
txt = ' '.join(list(nonterminals)[:3]) + ' ...'
|
||||||
return '<%s:%s>' % (self.__class__.__name__, txt)
|
return '<%s:%s>' % (self.__class__.__name__, txt)
|
||||||
|
|
||||||
|
|
||||||
class PythonGrammar(Grammar):
|
class PythonGrammar(Grammar):
|
||||||
_error_normalizer_config = ErrorFinderConfig()
|
_error_normalizer_config = ErrorFinderConfig()
|
||||||
_token_namespace = token
|
_token_namespace = PythonTokenTypes
|
||||||
_start_symbol = 'file_input'
|
_start_nonterminal = 'file_input'
|
||||||
|
|
||||||
def __init__(self, version_info, bnf_text):
|
def __init__(self, version_info, bnf_text):
|
||||||
super(PythonGrammar, self).__init__(
|
super(PythonGrammar, self).__init__(
|
||||||
@@ -215,46 +213,19 @@ class PythonGrammar(Grammar):
|
|||||||
return tokenize(code, self.version_info)
|
return tokenize(code, self.version_info)
|
||||||
|
|
||||||
|
|
||||||
class PythonFStringGrammar(Grammar):
|
|
||||||
_token_namespace = fstring.TokenNamespace
|
|
||||||
_start_symbol = 'fstring'
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
super(PythonFStringGrammar, self).__init__(
|
|
||||||
text=fstring.GRAMMAR,
|
|
||||||
tokenizer=fstring.tokenize,
|
|
||||||
parser=fstring.Parser
|
|
||||||
)
|
|
||||||
|
|
||||||
def parse(self, code, **kwargs):
|
|
||||||
return self._parse(code, **kwargs)
|
|
||||||
|
|
||||||
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
|
|
||||||
tokens = self._tokenizer(code, start_pos=start_pos)
|
|
||||||
p = self._parser(
|
|
||||||
self._pgen_grammar,
|
|
||||||
error_recovery=error_recovery,
|
|
||||||
start_symbol=self._start_symbol,
|
|
||||||
)
|
|
||||||
return p.parse(tokens=tokens)
|
|
||||||
|
|
||||||
def parse_leaf(self, leaf, error_recovery=True):
|
|
||||||
code = leaf._get_payload()
|
|
||||||
return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
|
|
||||||
|
|
||||||
|
|
||||||
def load_grammar(**kwargs):
|
def load_grammar(**kwargs):
|
||||||
"""
|
"""
|
||||||
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
||||||
version.
|
version.
|
||||||
|
|
||||||
:param str version: A python version string, e.g. ``version='3.3'``.
|
:param str version: A python version string, e.g. ``version='3.3'``.
|
||||||
|
:param str path: A path to a grammar file
|
||||||
"""
|
"""
|
||||||
def load_grammar(language='python', version=None):
|
def load_grammar(language='python', version=None, path=None):
|
||||||
if language == 'python':
|
if language == 'python':
|
||||||
version_info = parse_version_string(version)
|
version_info = parse_version_string(version)
|
||||||
|
|
||||||
file = os.path.join(
|
file = path or os.path.join(
|
||||||
'python',
|
'python',
|
||||||
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
||||||
)
|
)
|
||||||
@@ -273,10 +244,6 @@ def load_grammar(**kwargs):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
message = "Python version %s is currently not supported." % version
|
message = "Python version %s is currently not supported." % version
|
||||||
raise NotImplementedError(message)
|
raise NotImplementedError(message)
|
||||||
elif language == 'python-f-string':
|
|
||||||
if version is not None:
|
|
||||||
raise NotImplementedError("Currently different versions are not supported.")
|
|
||||||
return PythonFStringGrammar()
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("No support for language %s." % language)
|
raise NotImplementedError("No support for language %s." % language)
|
||||||
|
|
||||||
|
|||||||
173
parso/parser.py
173
parso/parser.py
@@ -1,3 +1,11 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright David Halter and Contributors
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
# 99% of the code is different from pgen2, now.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
The ``Parser`` tries to convert the available Python code in an easy to read
|
The ``Parser`` tries to convert the available Python code in an easy to read
|
||||||
format, something like an abstract syntax tree. The classes who represent this
|
format, something like an abstract syntax tree. The classes who represent this
|
||||||
@@ -16,7 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside
|
|||||||
``Statement``, which produces ``Array`` and ``Call``).
|
``Statement``, which produces ``Array`` and ``Call``).
|
||||||
"""
|
"""
|
||||||
from parso import tree
|
from parso import tree
|
||||||
from parso.pgen2.parse import PgenParser
|
from parso.pgen2.generator import ReservedString
|
||||||
|
|
||||||
|
|
||||||
class ParserSyntaxError(Exception):
|
class ParserSyntaxError(Exception):
|
||||||
@@ -30,7 +38,76 @@ class ParserSyntaxError(Exception):
|
|||||||
self.error_leaf = error_leaf
|
self.error_leaf = error_leaf
|
||||||
|
|
||||||
|
|
||||||
|
class InternalParseError(Exception):
|
||||||
|
"""
|
||||||
|
Exception to signal the parser is stuck and error recovery didn't help.
|
||||||
|
Basically this shouldn't happen. It's a sign that something is really
|
||||||
|
wrong.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, msg, type_, value, start_pos):
|
||||||
|
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
||||||
|
(msg, type_.name, value, start_pos))
|
||||||
|
self.msg = msg
|
||||||
|
self.type = type
|
||||||
|
self.value = value
|
||||||
|
self.start_pos = start_pos
|
||||||
|
|
||||||
|
|
||||||
|
class Stack(list):
|
||||||
|
def _allowed_transition_names_and_token_types(self):
|
||||||
|
def iterate():
|
||||||
|
# An API just for Jedi.
|
||||||
|
for stack_node in reversed(self):
|
||||||
|
for transition in stack_node.dfa.transitions:
|
||||||
|
if isinstance(transition, ReservedString):
|
||||||
|
yield transition.value
|
||||||
|
else:
|
||||||
|
yield transition # A token type
|
||||||
|
|
||||||
|
if not stack_node.dfa.is_final:
|
||||||
|
break
|
||||||
|
|
||||||
|
return list(iterate())
|
||||||
|
|
||||||
|
|
||||||
|
class StackNode(object):
|
||||||
|
def __init__(self, dfa):
|
||||||
|
self.dfa = dfa
|
||||||
|
self.nodes = []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def nonterminal(self):
|
||||||
|
return self.dfa.from_rule
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
|
||||||
|
|
||||||
|
|
||||||
|
def _token_to_transition(grammar, type_, value):
|
||||||
|
# Map from token to label
|
||||||
|
if type_.contains_syntax:
|
||||||
|
# Check for reserved words (keywords)
|
||||||
|
try:
|
||||||
|
return grammar.reserved_syntax_strings[value]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return type_
|
||||||
|
|
||||||
|
|
||||||
class BaseParser(object):
|
class BaseParser(object):
|
||||||
|
"""Parser engine.
|
||||||
|
|
||||||
|
A Parser instance contains state pertaining to the current token
|
||||||
|
sequence, and should not be used concurrently by different threads
|
||||||
|
to parse separate token sequences.
|
||||||
|
|
||||||
|
See python/tokenize.py for how to get input tokens by a string.
|
||||||
|
|
||||||
|
When a syntax error occurs, error_recovery() is called.
|
||||||
|
"""
|
||||||
|
|
||||||
node_map = {}
|
node_map = {}
|
||||||
default_node = tree.Node
|
default_node = tree.Node
|
||||||
|
|
||||||
@@ -38,41 +115,97 @@ class BaseParser(object):
|
|||||||
}
|
}
|
||||||
default_leaf = tree.Leaf
|
default_leaf = tree.Leaf
|
||||||
|
|
||||||
def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
|
def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
|
||||||
self._pgen_grammar = pgen_grammar
|
self._pgen_grammar = pgen_grammar
|
||||||
self._start_symbol = start_symbol
|
self._start_nonterminal = start_nonterminal
|
||||||
self._error_recovery = error_recovery
|
self._error_recovery = error_recovery
|
||||||
|
|
||||||
def parse(self, tokens):
|
def parse(self, tokens):
|
||||||
start_number = self._pgen_grammar.symbol2number[self._start_symbol]
|
first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
|
||||||
self.pgen_parser = PgenParser(
|
self.stack = Stack([StackNode(first_dfa)])
|
||||||
self._pgen_grammar, self.convert_node, self.convert_leaf,
|
|
||||||
self.error_recovery, start_number
|
for token in tokens:
|
||||||
|
self._add_token(token)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
tos = self.stack[-1]
|
||||||
|
if not tos.dfa.is_final:
|
||||||
|
# We never broke out -- EOF is too soon -- Unfinished statement.
|
||||||
|
# However, the error recovery might have added the token again, if
|
||||||
|
# the stack is empty, we're fine.
|
||||||
|
raise InternalParseError(
|
||||||
|
"incomplete input", token.type, token.value, token.start_pos
|
||||||
)
|
)
|
||||||
|
|
||||||
node = self.pgen_parser.parse(tokens)
|
if len(self.stack) > 1:
|
||||||
# The stack is empty now, we don't need it anymore.
|
self._pop()
|
||||||
del self.pgen_parser
|
else:
|
||||||
return node
|
return self.convert_node(tos.nonterminal, tos.nodes)
|
||||||
|
|
||||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
def error_recovery(self, token):
|
||||||
add_token_callback):
|
|
||||||
if self._error_recovery:
|
if self._error_recovery:
|
||||||
raise NotImplementedError("Error Recovery is not implemented")
|
raise NotImplementedError("Error Recovery is not implemented")
|
||||||
else:
|
else:
|
||||||
error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix)
|
type_, value, start_pos, prefix = token
|
||||||
|
error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix)
|
||||||
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
|
||||||
|
|
||||||
def convert_node(self, pgen_grammar, type_, children):
|
def convert_node(self, nonterminal, children):
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
|
||||||
symbol = pgen_grammar.number2symbol[type_]
|
|
||||||
try:
|
try:
|
||||||
return self.node_map[symbol](children)
|
node = self.node_map[nonterminal](children)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return self.default_node(symbol, children)
|
node = self.default_node(nonterminal, children)
|
||||||
|
for c in children:
|
||||||
|
c.parent = node
|
||||||
|
return node
|
||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
|
def convert_leaf(self, type_, value, prefix, start_pos):
|
||||||
try:
|
try:
|
||||||
return self.leaf_map[type_](value, start_pos, prefix)
|
return self.leaf_map[type_](value, start_pos, prefix)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return self.default_leaf(value, start_pos, prefix)
|
return self.default_leaf(value, start_pos, prefix)
|
||||||
|
|
||||||
|
def _add_token(self, token):
|
||||||
|
"""
|
||||||
|
This is the only core function for parsing. Here happens basically
|
||||||
|
everything. Everything is well prepared by the parser generator and we
|
||||||
|
only apply the necessary steps here.
|
||||||
|
"""
|
||||||
|
grammar = self._pgen_grammar
|
||||||
|
stack = self.stack
|
||||||
|
type_, value, start_pos, prefix = token
|
||||||
|
transition = _token_to_transition(grammar, type_, value)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
plan = stack[-1].dfa.transitions[transition]
|
||||||
|
break
|
||||||
|
except KeyError:
|
||||||
|
if stack[-1].dfa.is_final:
|
||||||
|
self._pop()
|
||||||
|
else:
|
||||||
|
self.error_recovery(token)
|
||||||
|
return
|
||||||
|
except IndexError:
|
||||||
|
raise InternalParseError("too much input", type_, value, start_pos)
|
||||||
|
|
||||||
|
stack[-1].dfa = plan.next_dfa
|
||||||
|
|
||||||
|
for push in plan.dfa_pushes:
|
||||||
|
stack.append(StackNode(push))
|
||||||
|
|
||||||
|
leaf = self.convert_leaf(type_, value, prefix, start_pos)
|
||||||
|
stack[-1].nodes.append(leaf)
|
||||||
|
|
||||||
|
def _pop(self):
|
||||||
|
tos = self.stack.pop()
|
||||||
|
# If there's exactly one child, return that child instead of
|
||||||
|
# creating a new node. We still create expr_stmt and
|
||||||
|
# file_input though, because a lot of Jedi depends on its
|
||||||
|
# logic.
|
||||||
|
if len(tos.nodes) == 1:
|
||||||
|
new_node = tos.nodes[0]
|
||||||
|
else:
|
||||||
|
new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
|
||||||
|
|
||||||
|
self.stack[-1].nodes.append(new_node)
|
||||||
|
|||||||
@@ -4,5 +4,7 @@
|
|||||||
# Modifications:
|
# Modifications:
|
||||||
# Copyright 2006 Google, Inc. All Rights Reserved.
|
# Copyright 2006 Google, Inc. All Rights Reserved.
|
||||||
# Licensed to PSF under a Contributor Agreement.
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
# Copyright 2014 David Halter. Integration into Jedi.
|
# Copyright 2014 David Halter and Contributors
|
||||||
# Modifications are dual-licensed: MIT and PSF.
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
|
from parso.pgen2.generator import generate_grammar
|
||||||
|
|||||||
358
parso/pgen2/generator.py
Normal file
358
parso/pgen2/generator.py
Normal file
@@ -0,0 +1,358 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright David Halter and Contributors
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This module defines the data structures used to represent a grammar.
|
||||||
|
|
||||||
|
Specifying grammars in pgen is possible with this grammar::
|
||||||
|
|
||||||
|
grammar: (NEWLINE | rule)* ENDMARKER
|
||||||
|
rule: NAME ':' rhs NEWLINE
|
||||||
|
rhs: items ('|' items)*
|
||||||
|
items: item+
|
||||||
|
item: '[' rhs ']' | atom ['+' | '*']
|
||||||
|
atom: '(' rhs ')' | NAME | STRING
|
||||||
|
|
||||||
|
This grammar is self-referencing.
|
||||||
|
|
||||||
|
This parser generator (pgen2) was created by Guido Rossum and used for lib2to3.
|
||||||
|
Most of the code has been refactored to make it more Pythonic. Since this was a
|
||||||
|
"copy" of the CPython Parser parser "pgen", there was some work needed to make
|
||||||
|
it more readable. It should also be slightly faster than the original pgen2,
|
||||||
|
because we made some optimizations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from ast import literal_eval
|
||||||
|
|
||||||
|
from parso.pgen2.grammar_parser import GrammarParser, NFAState
|
||||||
|
|
||||||
|
|
||||||
|
class Grammar(object):
|
||||||
|
"""
|
||||||
|
Once initialized, this class supplies the grammar tables for the
|
||||||
|
parsing engine implemented by parse.py. The parsing engine
|
||||||
|
accesses the instance variables directly.
|
||||||
|
|
||||||
|
The only important part in this parsers are dfas and transitions between
|
||||||
|
dfas.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings):
|
||||||
|
self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]]
|
||||||
|
self.reserved_syntax_strings = reserved_syntax_strings
|
||||||
|
self.start_nonterminal = start_nonterminal
|
||||||
|
|
||||||
|
|
||||||
|
class DFAPlan(object):
|
||||||
|
"""
|
||||||
|
Plans are used for the parser to create stack nodes and do the proper
|
||||||
|
DFA state transitions.
|
||||||
|
"""
|
||||||
|
def __init__(self, next_dfa, dfa_pushes=[]):
|
||||||
|
self.next_dfa = next_dfa
|
||||||
|
self.dfa_pushes = dfa_pushes
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes)
|
||||||
|
|
||||||
|
|
||||||
|
class DFAState(object):
|
||||||
|
"""
|
||||||
|
The DFAState object is the core class for pretty much anything. DFAState
|
||||||
|
are the vertices of an ordered graph while arcs and transitions are the
|
||||||
|
edges.
|
||||||
|
|
||||||
|
Arcs are the initial edges, where most DFAStates are not connected and
|
||||||
|
transitions are then calculated to connect the DFA state machines that have
|
||||||
|
different nonterminals.
|
||||||
|
"""
|
||||||
|
def __init__(self, from_rule, nfa_set, final):
|
||||||
|
assert isinstance(nfa_set, set)
|
||||||
|
assert isinstance(next(iter(nfa_set)), NFAState)
|
||||||
|
assert isinstance(final, NFAState)
|
||||||
|
self.from_rule = from_rule
|
||||||
|
self.nfa_set = nfa_set
|
||||||
|
self.arcs = {} # map from terminals/nonterminals to DFAState
|
||||||
|
# In an intermediary step we set these nonterminal arcs (which has the
|
||||||
|
# same structure as arcs). These don't contain terminals anymore.
|
||||||
|
self.nonterminal_arcs = {}
|
||||||
|
|
||||||
|
# Transitions are basically the only thing that the parser is using
|
||||||
|
# with is_final. Everyting else is purely here to create a parser.
|
||||||
|
self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan]
|
||||||
|
self.is_final = final in nfa_set
|
||||||
|
|
||||||
|
def add_arc(self, next_, label):
|
||||||
|
assert isinstance(label, str)
|
||||||
|
assert label not in self.arcs
|
||||||
|
assert isinstance(next_, DFAState)
|
||||||
|
self.arcs[label] = next_
|
||||||
|
|
||||||
|
def unifystate(self, old, new):
|
||||||
|
for label, next_ in self.arcs.items():
|
||||||
|
if next_ is old:
|
||||||
|
self.arcs[label] = new
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
# Equality test -- ignore the nfa_set instance variable
|
||||||
|
assert isinstance(other, DFAState)
|
||||||
|
if self.is_final != other.is_final:
|
||||||
|
return False
|
||||||
|
# Can't just return self.arcs == other.arcs, because that
|
||||||
|
# would invoke this method recursively, with cycles...
|
||||||
|
if len(self.arcs) != len(other.arcs):
|
||||||
|
return False
|
||||||
|
for label, next_ in self.arcs.items():
|
||||||
|
if next_ is not other.arcs.get(label):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
__hash__ = None # For Py3 compatibility.
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<%s: %s is_final=%s>' % (
|
||||||
|
self.__class__.__name__, self.from_rule, self.is_final
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ReservedString(object):
|
||||||
|
"""
|
||||||
|
Most grammars will have certain keywords and operators that are mentioned
|
||||||
|
in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER).
|
||||||
|
This class basically is the former.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s(%s)' % (self.__class__.__name__, self.value)
|
||||||
|
|
||||||
|
|
||||||
|
def _simplify_dfas(dfas):
|
||||||
|
"""
|
||||||
|
This is not theoretically optimal, but works well enough.
|
||||||
|
Algorithm: repeatedly look for two states that have the same
|
||||||
|
set of arcs (same labels pointing to the same nodes) and
|
||||||
|
unify them, until things stop changing.
|
||||||
|
|
||||||
|
dfas is a list of DFAState instances
|
||||||
|
"""
|
||||||
|
changes = True
|
||||||
|
while changes:
|
||||||
|
changes = False
|
||||||
|
for i, state_i in enumerate(dfas):
|
||||||
|
for j in range(i + 1, len(dfas)):
|
||||||
|
state_j = dfas[j]
|
||||||
|
if state_i == state_j:
|
||||||
|
#print " unify", i, j
|
||||||
|
del dfas[j]
|
||||||
|
for state in dfas:
|
||||||
|
state.unifystate(state_j, state_i)
|
||||||
|
changes = True
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def _make_dfas(start, finish):
|
||||||
|
"""
|
||||||
|
Uses the powerset construction algorithm to create DFA states from sets of
|
||||||
|
NFA states.
|
||||||
|
|
||||||
|
Also does state reduction if some states are not needed.
|
||||||
|
"""
|
||||||
|
# To turn an NFA into a DFA, we define the states of the DFA
|
||||||
|
# to correspond to *sets* of states of the NFA. Then do some
|
||||||
|
# state reduction.
|
||||||
|
assert isinstance(start, NFAState)
|
||||||
|
assert isinstance(finish, NFAState)
|
||||||
|
|
||||||
|
def addclosure(nfa_state, base_nfa_set):
|
||||||
|
assert isinstance(nfa_state, NFAState)
|
||||||
|
if nfa_state in base_nfa_set:
|
||||||
|
return
|
||||||
|
base_nfa_set.add(nfa_state)
|
||||||
|
for nfa_arc in nfa_state.arcs:
|
||||||
|
if nfa_arc.nonterminal_or_string is None:
|
||||||
|
addclosure(nfa_arc.next, base_nfa_set)
|
||||||
|
|
||||||
|
base_nfa_set = set()
|
||||||
|
addclosure(start, base_nfa_set)
|
||||||
|
states = [DFAState(start.from_rule, base_nfa_set, finish)]
|
||||||
|
for state in states: # NB states grows while we're iterating
|
||||||
|
arcs = {}
|
||||||
|
# Find state transitions and store them in arcs.
|
||||||
|
for nfa_state in state.nfa_set:
|
||||||
|
for nfa_arc in nfa_state.arcs:
|
||||||
|
if nfa_arc.nonterminal_or_string is not None:
|
||||||
|
nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set())
|
||||||
|
addclosure(nfa_arc.next, nfa_set)
|
||||||
|
|
||||||
|
# Now create the dfa's with no None's in arcs anymore. All Nones have
|
||||||
|
# been eliminated and state transitions (arcs) are properly defined, we
|
||||||
|
# just need to create the dfa's.
|
||||||
|
for nonterminal_or_string, nfa_set in arcs.items():
|
||||||
|
for nested_state in states:
|
||||||
|
if nested_state.nfa_set == nfa_set:
|
||||||
|
# The DFA state already exists for this rule.
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
nested_state = DFAState(start.from_rule, nfa_set, finish)
|
||||||
|
states.append(nested_state)
|
||||||
|
|
||||||
|
state.add_arc(nested_state, nonterminal_or_string)
|
||||||
|
return states # List of DFAState instances; first one is start
|
||||||
|
|
||||||
|
|
||||||
|
def _dump_nfa(start, finish):
|
||||||
|
print("Dump of NFA for", start.from_rule)
|
||||||
|
todo = [start]
|
||||||
|
for i, state in enumerate(todo):
|
||||||
|
print(" State", i, state is finish and "(final)" or "")
|
||||||
|
for label, next_ in state.arcs:
|
||||||
|
if next_ in todo:
|
||||||
|
j = todo.index(next_)
|
||||||
|
else:
|
||||||
|
j = len(todo)
|
||||||
|
todo.append(next_)
|
||||||
|
if label is None:
|
||||||
|
print(" -> %d" % j)
|
||||||
|
else:
|
||||||
|
print(" %s -> %d" % (label, j))
|
||||||
|
|
||||||
|
|
||||||
|
def _dump_dfas(dfas):
|
||||||
|
print("Dump of DFA for", dfas[0].from_rule)
|
||||||
|
for i, state in enumerate(dfas):
|
||||||
|
print(" State", i, state.is_final and "(final)" or "")
|
||||||
|
for nonterminal, next_ in state.arcs.items():
|
||||||
|
print(" %s -> %d" % (nonterminal, dfas.index(next_)))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_grammar(bnf_grammar, token_namespace):
|
||||||
|
"""
|
||||||
|
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
|
||||||
|
at-least-once repetition, [] for optional parts, | for alternatives and ()
|
||||||
|
for grouping).
|
||||||
|
|
||||||
|
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
|
||||||
|
own parser.
|
||||||
|
"""
|
||||||
|
rule_to_dfas = {}
|
||||||
|
start_nonterminal = None
|
||||||
|
for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
|
||||||
|
#_dump_nfa(a, z)
|
||||||
|
dfas = _make_dfas(nfa_a, nfa_z)
|
||||||
|
#_dump_dfas(dfas)
|
||||||
|
# oldlen = len(dfas)
|
||||||
|
_simplify_dfas(dfas)
|
||||||
|
# newlen = len(dfas)
|
||||||
|
rule_to_dfas[nfa_a.from_rule] = dfas
|
||||||
|
#print(nfa_a.from_rule, oldlen, newlen)
|
||||||
|
|
||||||
|
if start_nonterminal is None:
|
||||||
|
start_nonterminal = nfa_a.from_rule
|
||||||
|
|
||||||
|
reserved_strings = {}
|
||||||
|
for nonterminal, dfas in rule_to_dfas.items():
|
||||||
|
for dfa_state in dfas:
|
||||||
|
for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items():
|
||||||
|
if terminal_or_nonterminal in rule_to_dfas:
|
||||||
|
dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa
|
||||||
|
else:
|
||||||
|
transition = _make_transition(
|
||||||
|
token_namespace,
|
||||||
|
reserved_strings,
|
||||||
|
terminal_or_nonterminal
|
||||||
|
)
|
||||||
|
dfa_state.transitions[transition] = DFAPlan(next_dfa)
|
||||||
|
|
||||||
|
_calculate_tree_traversal(rule_to_dfas)
|
||||||
|
return Grammar(start_nonterminal, rule_to_dfas, reserved_strings)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_transition(token_namespace, reserved_syntax_strings, label):
|
||||||
|
"""
|
||||||
|
Creates a reserved string ("if", "for", "*", ...) or returns the token type
|
||||||
|
(NUMBER, STRING, ...) for a given grammar terminal.
|
||||||
|
"""
|
||||||
|
if label[0].isalpha():
|
||||||
|
# A named token (e.g. NAME, NUMBER, STRING)
|
||||||
|
return getattr(token_namespace, label)
|
||||||
|
else:
|
||||||
|
# Either a keyword or an operator
|
||||||
|
assert label[0] in ('"', "'"), label
|
||||||
|
assert not label.startswith('"""') and not label.startswith("'''")
|
||||||
|
value = literal_eval(label)
|
||||||
|
try:
|
||||||
|
return reserved_syntax_strings[value]
|
||||||
|
except KeyError:
|
||||||
|
r = reserved_syntax_strings[value] = ReservedString(value)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
def _calculate_tree_traversal(nonterminal_to_dfas):
|
||||||
|
"""
|
||||||
|
By this point we know how dfas can move around within a stack node, but we
|
||||||
|
don't know how we can add a new stack node (nonterminal transitions).
|
||||||
|
"""
|
||||||
|
# Map from grammar rule (nonterminal) name to a set of tokens.
|
||||||
|
first_plans = {}
|
||||||
|
|
||||||
|
nonterminals = list(nonterminal_to_dfas.keys())
|
||||||
|
nonterminals.sort()
|
||||||
|
for nonterminal in nonterminals:
|
||||||
|
if nonterminal not in first_plans:
|
||||||
|
_calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)
|
||||||
|
|
||||||
|
# Now that we have calculated the first terminals, we are sure that
|
||||||
|
# there is no left recursion or ambiguities.
|
||||||
|
|
||||||
|
for dfas in nonterminal_to_dfas.values():
|
||||||
|
for dfa_state in dfas:
|
||||||
|
for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
|
||||||
|
for transition, pushes in first_plans[nonterminal].items():
|
||||||
|
dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
|
||||||
|
|
||||||
|
|
||||||
|
def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
|
||||||
|
"""
|
||||||
|
Calculates the first plan in the first_plans dictionary for every given
|
||||||
|
nonterminal. This is going to be used to know when to create stack nodes.
|
||||||
|
"""
|
||||||
|
dfas = nonterminal_to_dfas[nonterminal]
|
||||||
|
new_first_plans = {}
|
||||||
|
first_plans[nonterminal] = None # dummy to detect left recursion
|
||||||
|
# We only need to check the first dfa. All the following ones are not
|
||||||
|
# interesting to find first terminals.
|
||||||
|
state = dfas[0]
|
||||||
|
for transition, next_ in state.transitions.items():
|
||||||
|
# It's a string. We have finally found a possible first token.
|
||||||
|
new_first_plans[transition] = [next_.next_dfa]
|
||||||
|
|
||||||
|
for nonterminal2, next_ in state.nonterminal_arcs.items():
|
||||||
|
# It's a nonterminal and we have either a left recursion issue
|
||||||
|
# in the grammar or we have to recurse.
|
||||||
|
try:
|
||||||
|
first_plans2 = first_plans[nonterminal2]
|
||||||
|
except KeyError:
|
||||||
|
first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2)
|
||||||
|
else:
|
||||||
|
if first_plans2 is None:
|
||||||
|
raise ValueError("left recursion for rule %r" % nonterminal)
|
||||||
|
|
||||||
|
for t, pushes in first_plans2.items():
|
||||||
|
check = new_first_plans.get(t)
|
||||||
|
if check is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"Rule %s is ambiguous; %s is the"
|
||||||
|
" start of the rule %s as well as %s."
|
||||||
|
% (nonterminal, t, nonterminal2, check[-1].from_rule)
|
||||||
|
)
|
||||||
|
new_first_plans[t] = [next_] + pushes
|
||||||
|
|
||||||
|
first_plans[nonterminal] = new_first_plans
|
||||||
|
return new_first_plans
|
||||||
@@ -1,128 +0,0 @@
|
|||||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
|
||||||
# Licensed to PSF under a Contributor Agreement.
|
|
||||||
|
|
||||||
# Modifications:
|
|
||||||
# Copyright 2014 David Halter. Integration into Jedi.
|
|
||||||
# Modifications are dual-licensed: MIT and PSF.
|
|
||||||
|
|
||||||
"""This module defines the data structures used to represent a grammar.
|
|
||||||
|
|
||||||
These are a bit arcane because they are derived from the data
|
|
||||||
structures used by Python's 'pgen' parser generator.
|
|
||||||
|
|
||||||
There's also a table here mapping operators to their names in the
|
|
||||||
token module; the Python tokenize module reports all operators as the
|
|
||||||
fallback token code OP, but the parser needs the actual token code.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
import cPickle as pickle
|
|
||||||
except:
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
|
|
||||||
class Grammar(object):
|
|
||||||
"""Pgen parsing tables conversion class.
|
|
||||||
|
|
||||||
Once initialized, this class supplies the grammar tables for the
|
|
||||||
parsing engine implemented by parse.py. The parsing engine
|
|
||||||
accesses the instance variables directly. The class here does not
|
|
||||||
provide initialization of the tables; several subclasses exist to
|
|
||||||
do this (see the conv and pgen modules).
|
|
||||||
|
|
||||||
The load() method reads the tables from a pickle file, which is
|
|
||||||
much faster than the other ways offered by subclasses. The pickle
|
|
||||||
file is written by calling dump() (after loading the grammar
|
|
||||||
tables using a subclass). The report() method prints a readable
|
|
||||||
representation of the tables to stdout, for debugging.
|
|
||||||
|
|
||||||
The instance variables are as follows:
|
|
||||||
|
|
||||||
symbol2number -- a dict mapping symbol names to numbers. Symbol
|
|
||||||
numbers are always 256 or higher, to distinguish
|
|
||||||
them from token numbers, which are between 0 and
|
|
||||||
255 (inclusive).
|
|
||||||
|
|
||||||
number2symbol -- a dict mapping numbers to symbol names;
|
|
||||||
these two are each other's inverse.
|
|
||||||
|
|
||||||
states -- a list of DFAs, where each DFA is a list of
|
|
||||||
states, each state is a list of arcs, and each
|
|
||||||
arc is a (i, j) pair where i is a label and j is
|
|
||||||
a state number. The DFA number is the index into
|
|
||||||
this list. (This name is slightly confusing.)
|
|
||||||
Final states are represented by a special arc of
|
|
||||||
the form (0, j) where j is its own state number.
|
|
||||||
|
|
||||||
dfas -- a dict mapping symbol numbers to (DFA, first)
|
|
||||||
pairs, where DFA is an item from the states list
|
|
||||||
above, and first is a set of tokens that can
|
|
||||||
begin this grammar rule (represented by a dict
|
|
||||||
whose values are always 1).
|
|
||||||
|
|
||||||
labels -- a list of (x, y) pairs where x is either a token
|
|
||||||
number or a symbol number, and y is either None
|
|
||||||
or a string; the strings are keywords. The label
|
|
||||||
number is the index in this list; label numbers
|
|
||||||
are used to mark state transitions (arcs) in the
|
|
||||||
DFAs.
|
|
||||||
|
|
||||||
start -- the number of the grammar's start symbol.
|
|
||||||
|
|
||||||
keywords -- a dict mapping keyword strings to arc labels.
|
|
||||||
|
|
||||||
tokens -- a dict mapping token numbers to arc labels.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, bnf_text):
|
|
||||||
self.symbol2number = {}
|
|
||||||
self.number2symbol = {}
|
|
||||||
self.states = []
|
|
||||||
self.dfas = {}
|
|
||||||
self.labels = [(0, "EMPTY")]
|
|
||||||
self.keywords = {}
|
|
||||||
self.tokens = {}
|
|
||||||
self.symbol2label = {}
|
|
||||||
self.label2symbol = {}
|
|
||||||
self.start = 256
|
|
||||||
|
|
||||||
def dump(self, filename):
|
|
||||||
"""Dump the grammar tables to a pickle file."""
|
|
||||||
with open(filename, "wb") as f:
|
|
||||||
pickle.dump(self.__dict__, f, 2)
|
|
||||||
|
|
||||||
def load(self, filename):
|
|
||||||
"""Load the grammar tables from a pickle file."""
|
|
||||||
with open(filename, "rb") as f:
|
|
||||||
d = pickle.load(f)
|
|
||||||
self.__dict__.update(d)
|
|
||||||
|
|
||||||
def copy(self):
|
|
||||||
"""
|
|
||||||
Copy the grammar.
|
|
||||||
"""
|
|
||||||
new = self.__class__()
|
|
||||||
for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
|
|
||||||
"tokens", "symbol2label"):
|
|
||||||
setattr(new, dict_attr, getattr(self, dict_attr).copy())
|
|
||||||
new.labels = self.labels[:]
|
|
||||||
new.states = self.states[:]
|
|
||||||
new.start = self.start
|
|
||||||
return new
|
|
||||||
|
|
||||||
def report(self):
|
|
||||||
"""Dump the grammar tables to standard output, for debugging."""
|
|
||||||
from pprint import pprint
|
|
||||||
print("s2n")
|
|
||||||
pprint(self.symbol2number)
|
|
||||||
print("n2s")
|
|
||||||
pprint(self.number2symbol)
|
|
||||||
print("states")
|
|
||||||
pprint(self.states)
|
|
||||||
print("dfas")
|
|
||||||
pprint(self.dfas)
|
|
||||||
print("labels")
|
|
||||||
pprint(self.labels)
|
|
||||||
print("start", self.start)
|
|
||||||
156
parso/pgen2/grammar_parser.py
Normal file
156
parso/pgen2/grammar_parser.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
||||||
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
|
|
||||||
|
# Modifications:
|
||||||
|
# Copyright David Halter and Contributors
|
||||||
|
# Modifications are dual-licensed: MIT and PSF.
|
||||||
|
|
||||||
|
from parso.python.tokenize import tokenize
|
||||||
|
from parso.utils import parse_version_string
|
||||||
|
from parso.python.token import PythonTokenTypes
|
||||||
|
|
||||||
|
|
||||||
|
class GrammarParser():
|
||||||
|
"""
|
||||||
|
The parser for Python grammar files.
|
||||||
|
"""
|
||||||
|
def __init__(self, bnf_grammar):
|
||||||
|
self._bnf_grammar = bnf_grammar
|
||||||
|
self.generator = tokenize(
|
||||||
|
bnf_grammar,
|
||||||
|
version_info=parse_version_string('3.6')
|
||||||
|
)
|
||||||
|
self._gettoken() # Initialize lookahead
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
# grammar: (NEWLINE | rule)* ENDMARKER
|
||||||
|
while self.type != PythonTokenTypes.ENDMARKER:
|
||||||
|
while self.type == PythonTokenTypes.NEWLINE:
|
||||||
|
self._gettoken()
|
||||||
|
|
||||||
|
# rule: NAME ':' rhs NEWLINE
|
||||||
|
self._current_rule_name = self._expect(PythonTokenTypes.NAME)
|
||||||
|
self._expect(PythonTokenTypes.OP, ':')
|
||||||
|
|
||||||
|
a, z = self._parse_rhs()
|
||||||
|
self._expect(PythonTokenTypes.NEWLINE)
|
||||||
|
|
||||||
|
yield a, z
|
||||||
|
|
||||||
|
def _parse_rhs(self):
|
||||||
|
# rhs: items ('|' items)*
|
||||||
|
a, z = self._parse_items()
|
||||||
|
if self.value != "|":
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
aa = NFAState(self._current_rule_name)
|
||||||
|
zz = NFAState(self._current_rule_name)
|
||||||
|
while True:
|
||||||
|
# Add the possibility to go into the state of a and come back
|
||||||
|
# to finish.
|
||||||
|
aa.add_arc(a)
|
||||||
|
z.add_arc(zz)
|
||||||
|
if self.value != "|":
|
||||||
|
break
|
||||||
|
|
||||||
|
self._gettoken()
|
||||||
|
a, z = self._parse_items()
|
||||||
|
return aa, zz
|
||||||
|
|
||||||
|
def _parse_items(self):
|
||||||
|
# items: item+
|
||||||
|
a, b = self._parse_item()
|
||||||
|
while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \
|
||||||
|
or self.value in ('(', '['):
|
||||||
|
c, d = self._parse_item()
|
||||||
|
# Need to end on the next item.
|
||||||
|
b.add_arc(c)
|
||||||
|
b = d
|
||||||
|
return a, b
|
||||||
|
|
||||||
|
def _parse_item(self):
|
||||||
|
# item: '[' rhs ']' | atom ['+' | '*']
|
||||||
|
if self.value == "[":
|
||||||
|
self._gettoken()
|
||||||
|
a, z = self._parse_rhs()
|
||||||
|
self._expect(PythonTokenTypes.OP, ']')
|
||||||
|
# Make it also possible that there is no token and change the
|
||||||
|
# state.
|
||||||
|
a.add_arc(z)
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
a, z = self._parse_atom()
|
||||||
|
value = self.value
|
||||||
|
if value not in ("+", "*"):
|
||||||
|
return a, z
|
||||||
|
self._gettoken()
|
||||||
|
# Make it clear that we can go back to the old state and repeat.
|
||||||
|
z.add_arc(a)
|
||||||
|
if value == "+":
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
# The end state is the same as the beginning, nothing must
|
||||||
|
# change.
|
||||||
|
return a, a
|
||||||
|
|
||||||
|
def _parse_atom(self):
|
||||||
|
# atom: '(' rhs ')' | NAME | STRING
|
||||||
|
if self.value == "(":
|
||||||
|
self._gettoken()
|
||||||
|
a, z = self._parse_rhs()
|
||||||
|
self._expect(PythonTokenTypes.OP, ')')
|
||||||
|
return a, z
|
||||||
|
elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING):
|
||||||
|
a = NFAState(self._current_rule_name)
|
||||||
|
z = NFAState(self._current_rule_name)
|
||||||
|
# Make it clear that the state transition requires that value.
|
||||||
|
a.add_arc(z, self.value)
|
||||||
|
self._gettoken()
|
||||||
|
return a, z
|
||||||
|
else:
|
||||||
|
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
||||||
|
self.type, self.value)
|
||||||
|
|
||||||
|
def _expect(self, type_, value=None):
|
||||||
|
if self.type != type_:
|
||||||
|
self._raise_error("expected %s, got %s [%s]",
|
||||||
|
type_, self.type, self.value)
|
||||||
|
if value is not None and self.value != value:
|
||||||
|
self._raise_error("expected %s, got %s", value, self.value)
|
||||||
|
value = self.value
|
||||||
|
self._gettoken()
|
||||||
|
return value
|
||||||
|
|
||||||
|
def _gettoken(self):
|
||||||
|
tup = next(self.generator)
|
||||||
|
self.type, self.value, self.begin, prefix = tup
|
||||||
|
|
||||||
|
def _raise_error(self, msg, *args):
|
||||||
|
if args:
|
||||||
|
try:
|
||||||
|
msg = msg % args
|
||||||
|
except:
|
||||||
|
msg = " ".join([msg] + list(map(str, args)))
|
||||||
|
line = self._bnf_grammar.splitlines()[self.begin[0] - 1]
|
||||||
|
raise SyntaxError(msg, ('<grammar>', self.begin[0],
|
||||||
|
self.begin[1], line))
|
||||||
|
|
||||||
|
|
||||||
|
class NFAArc(object):
|
||||||
|
def __init__(self, next_, nonterminal_or_string):
|
||||||
|
self.next = next_
|
||||||
|
self.nonterminal_or_string = nonterminal_or_string
|
||||||
|
|
||||||
|
|
||||||
|
class NFAState(object):
|
||||||
|
def __init__(self, from_rule):
|
||||||
|
self.from_rule = from_rule
|
||||||
|
self.arcs = [] # List[nonterminal (str), NFAState]
|
||||||
|
|
||||||
|
def add_arc(self, next_, nonterminal_or_string=None):
|
||||||
|
assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str)
|
||||||
|
assert isinstance(next_, NFAState)
|
||||||
|
self.arcs.append(NFAArc(next_, nonterminal_or_string))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<%s: from %s>' % (self.__class__.__name__, self.from_rule)
|
||||||
@@ -1,223 +0,0 @@
|
|||||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
|
||||||
# Licensed to PSF under a Contributor Agreement.
|
|
||||||
|
|
||||||
# Modifications:
|
|
||||||
# Copyright 2014 David Halter. Integration into Jedi.
|
|
||||||
# Modifications are dual-licensed: MIT and PSF.
|
|
||||||
|
|
||||||
"""
|
|
||||||
Parser engine for the grammar tables generated by pgen.
|
|
||||||
|
|
||||||
The grammar table must be loaded first.
|
|
||||||
|
|
||||||
See Parser/parser.c in the Python distribution for additional info on
|
|
||||||
how this parsing engine works.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from parso.python import tokenize
|
|
||||||
|
|
||||||
|
|
||||||
class InternalParseError(Exception):
|
|
||||||
"""
|
|
||||||
Exception to signal the parser is stuck and error recovery didn't help.
|
|
||||||
Basically this shouldn't happen. It's a sign that something is really
|
|
||||||
wrong.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, msg, type, value, start_pos):
|
|
||||||
Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
|
|
||||||
(msg, tokenize.tok_name[type], value, start_pos))
|
|
||||||
self.msg = msg
|
|
||||||
self.type = type
|
|
||||||
self.value = value
|
|
||||||
self.start_pos = start_pos
|
|
||||||
|
|
||||||
|
|
||||||
class Stack(list):
|
|
||||||
def get_tos_nodes(self):
|
|
||||||
tos = self[-1]
|
|
||||||
return tos[2][1]
|
|
||||||
|
|
||||||
|
|
||||||
def token_to_ilabel(grammar, type_, value):
|
|
||||||
# Map from token to label
|
|
||||||
if type_ == tokenize.NAME:
|
|
||||||
# Check for reserved words (keywords)
|
|
||||||
try:
|
|
||||||
return grammar.keywords[value]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
return grammar.tokens[type_]
|
|
||||||
except KeyError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class PgenParser(object):
|
|
||||||
"""Parser engine.
|
|
||||||
|
|
||||||
The proper usage sequence is:
|
|
||||||
|
|
||||||
p = Parser(grammar, [converter]) # create instance
|
|
||||||
p.setup([start]) # prepare for parsing
|
|
||||||
<for each input token>:
|
|
||||||
if p.add_token(...): # parse a token
|
|
||||||
break
|
|
||||||
root = p.rootnode # root of abstract syntax tree
|
|
||||||
|
|
||||||
A Parser instance may be reused by calling setup() repeatedly.
|
|
||||||
|
|
||||||
A Parser instance contains state pertaining to the current token
|
|
||||||
sequence, and should not be used concurrently by different threads
|
|
||||||
to parse separate token sequences.
|
|
||||||
|
|
||||||
See driver.py for how to get input tokens by tokenizing a file or
|
|
||||||
string.
|
|
||||||
|
|
||||||
Parsing is complete when add_token() returns True; the root of the
|
|
||||||
abstract syntax tree can then be retrieved from the rootnode
|
|
||||||
instance variable. When a syntax error occurs, error_recovery()
|
|
||||||
is called. There is no error recovery; the parser cannot be used
|
|
||||||
after a syntax error was reported (but it can be reinitialized by
|
|
||||||
calling setup()).
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
The grammar argument is a grammar.Grammar instance; see the
|
|
||||||
grammar module for more information.
|
|
||||||
|
|
||||||
The parser is not ready yet for parsing; you must call the
|
|
||||||
setup() method to get it started.
|
|
||||||
|
|
||||||
The optional convert argument is a function mapping concrete
|
|
||||||
syntax tree nodes to abstract syntax tree nodes. If not
|
|
||||||
given, no conversion is done and the syntax tree produced is
|
|
||||||
the concrete syntax tree. If given, it must be a function of
|
|
||||||
two arguments, the first being the grammar (a grammar.Grammar
|
|
||||||
instance), and the second being the concrete syntax tree node
|
|
||||||
to be converted. The syntax tree is converted from the bottom
|
|
||||||
up.
|
|
||||||
|
|
||||||
A concrete syntax tree node is a (type, nodes) tuple, where
|
|
||||||
type is the node type (a token or symbol number) and nodes
|
|
||||||
is a list of children for symbols, and None for tokens.
|
|
||||||
|
|
||||||
An abstract syntax tree node may be anything; this is entirely
|
|
||||||
up to the converter function.
|
|
||||||
|
|
||||||
"""
|
|
||||||
self.grammar = grammar
|
|
||||||
self.convert_node = convert_node
|
|
||||||
self.convert_leaf = convert_leaf
|
|
||||||
|
|
||||||
# Each stack entry is a tuple: (dfa, state, node).
|
|
||||||
# A node is a tuple: (type, children),
|
|
||||||
# where children is a list of nodes or None
|
|
||||||
newnode = (start, [])
|
|
||||||
stackentry = (self.grammar.dfas[start], 0, newnode)
|
|
||||||
self.stack = Stack([stackentry])
|
|
||||||
self.rootnode = None
|
|
||||||
self.error_recovery = error_recovery
|
|
||||||
|
|
||||||
def parse(self, tokens):
|
|
||||||
for type_, value, start_pos, prefix in tokens:
|
|
||||||
if self.add_token(type_, value, start_pos, prefix):
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# We never broke out -- EOF is too soon -- Unfinished statement.
|
|
||||||
# However, the error recovery might have added the token again, if
|
|
||||||
# the stack is empty, we're fine.
|
|
||||||
if self.stack:
|
|
||||||
raise InternalParseError("incomplete input", type_, value, start_pos)
|
|
||||||
return self.rootnode
|
|
||||||
|
|
||||||
def add_token(self, type_, value, start_pos, prefix):
|
|
||||||
"""Add a token; return True if this is the end of the program."""
|
|
||||||
ilabel = token_to_ilabel(self.grammar, type_, value)
|
|
||||||
|
|
||||||
# Loop until the token is shifted; may raise exceptions
|
|
||||||
_gram = self.grammar
|
|
||||||
_labels = _gram.labels
|
|
||||||
_push = self._push
|
|
||||||
_pop = self._pop
|
|
||||||
_shift = self._shift
|
|
||||||
while True:
|
|
||||||
dfa, state, node = self.stack[-1]
|
|
||||||
states, first = dfa
|
|
||||||
arcs = states[state]
|
|
||||||
# Look for a state with this label
|
|
||||||
for i, newstate in arcs:
|
|
||||||
t, v = _labels[i]
|
|
||||||
if ilabel == i:
|
|
||||||
# Look it up in the list of labels
|
|
||||||
assert t < 256
|
|
||||||
# Shift a token; we're done with it
|
|
||||||
_shift(type_, value, newstate, prefix, start_pos)
|
|
||||||
# Pop while we are in an accept-only state
|
|
||||||
state = newstate
|
|
||||||
while states[state] == [(0, state)]:
|
|
||||||
_pop()
|
|
||||||
if not self.stack:
|
|
||||||
# Done parsing!
|
|
||||||
return True
|
|
||||||
dfa, state, node = self.stack[-1]
|
|
||||||
states, first = dfa
|
|
||||||
# Done with this token
|
|
||||||
return False
|
|
||||||
elif t >= 256:
|
|
||||||
# See if it's a symbol and if we're in its first set
|
|
||||||
itsdfa = _gram.dfas[t]
|
|
||||||
itsstates, itsfirst = itsdfa
|
|
||||||
if ilabel in itsfirst:
|
|
||||||
# Push a symbol
|
|
||||||
_push(t, itsdfa, newstate)
|
|
||||||
break # To continue the outer while loop
|
|
||||||
else:
|
|
||||||
if (0, state) in arcs:
|
|
||||||
# An accepting state, pop it and try something else
|
|
||||||
_pop()
|
|
||||||
if not self.stack:
|
|
||||||
# Done parsing, but another token is input
|
|
||||||
raise InternalParseError("too much input", type_, value, start_pos)
|
|
||||||
else:
|
|
||||||
self.error_recovery(self.grammar, self.stack, arcs, type_,
|
|
||||||
value, start_pos, prefix, self.add_token)
|
|
||||||
break
|
|
||||||
|
|
||||||
def _shift(self, type_, value, newstate, prefix, start_pos):
|
|
||||||
"""Shift a token. (Internal)"""
|
|
||||||
dfa, state, node = self.stack[-1]
|
|
||||||
newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos)
|
|
||||||
node[-1].append(newnode)
|
|
||||||
self.stack[-1] = (dfa, newstate, node)
|
|
||||||
|
|
||||||
def _push(self, type_, newdfa, newstate):
|
|
||||||
"""Push a nonterminal. (Internal)"""
|
|
||||||
dfa, state, node = self.stack[-1]
|
|
||||||
newnode = (type_, [])
|
|
||||||
self.stack[-1] = (dfa, newstate, node)
|
|
||||||
self.stack.append((newdfa, 0, newnode))
|
|
||||||
|
|
||||||
def _pop(self):
|
|
||||||
"""Pop a nonterminal. (Internal)"""
|
|
||||||
popdfa, popstate, (type_, children) = self.stack.pop()
|
|
||||||
# If there's exactly one child, return that child instead of creating a
|
|
||||||
# new node. We still create expr_stmt and file_input though, because a
|
|
||||||
# lot of Jedi depends on its logic.
|
|
||||||
if len(children) == 1:
|
|
||||||
newnode = children[0]
|
|
||||||
else:
|
|
||||||
newnode = self.convert_node(self.grammar, type_, children)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Equal to:
|
|
||||||
# dfa, state, node = self.stack[-1]
|
|
||||||
# symbol, children = node
|
|
||||||
self.stack[-1][2][1].append(newnode)
|
|
||||||
except IndexError:
|
|
||||||
# Stack is empty, set the rootnode.
|
|
||||||
self.rootnode = newnode
|
|
||||||
@@ -1,399 +0,0 @@
|
|||||||
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
|
||||||
# Licensed to PSF under a Contributor Agreement.
|
|
||||||
|
|
||||||
# Modifications:
|
|
||||||
# Copyright 2014 David Halter. Integration into Jedi.
|
|
||||||
# Modifications are dual-licensed: MIT and PSF.
|
|
||||||
|
|
||||||
from parso.pgen2 import grammar
|
|
||||||
from parso.python import token
|
|
||||||
from parso.python import tokenize
|
|
||||||
from parso.utils import parse_version_string
|
|
||||||
|
|
||||||
|
|
||||||
class ParserGenerator(object):
|
|
||||||
def __init__(self, bnf_text, token_namespace):
|
|
||||||
self._bnf_text = bnf_text
|
|
||||||
self.generator = tokenize.tokenize(
|
|
||||||
bnf_text,
|
|
||||||
version_info=parse_version_string('3.6')
|
|
||||||
)
|
|
||||||
self._gettoken() # Initialize lookahead
|
|
||||||
self.dfas, self.startsymbol = self._parse()
|
|
||||||
self.first = {} # map from symbol name to set of tokens
|
|
||||||
self._addfirstsets()
|
|
||||||
self._token_namespace = token_namespace
|
|
||||||
|
|
||||||
def make_grammar(self):
|
|
||||||
c = grammar.Grammar(self._bnf_text)
|
|
||||||
names = list(self.dfas.keys())
|
|
||||||
names.sort()
|
|
||||||
names.remove(self.startsymbol)
|
|
||||||
names.insert(0, self.startsymbol)
|
|
||||||
for name in names:
|
|
||||||
i = 256 + len(c.symbol2number)
|
|
||||||
c.symbol2number[name] = i
|
|
||||||
c.number2symbol[i] = name
|
|
||||||
for name in names:
|
|
||||||
dfa = self.dfas[name]
|
|
||||||
states = []
|
|
||||||
for state in dfa:
|
|
||||||
arcs = []
|
|
||||||
for label, next in state.arcs.items():
|
|
||||||
arcs.append((self._make_label(c, label), dfa.index(next)))
|
|
||||||
if state.isfinal:
|
|
||||||
arcs.append((0, dfa.index(state)))
|
|
||||||
states.append(arcs)
|
|
||||||
c.states.append(states)
|
|
||||||
c.dfas[c.symbol2number[name]] = (states, self._make_first(c, name))
|
|
||||||
c.start = c.symbol2number[self.startsymbol]
|
|
||||||
return c
|
|
||||||
|
|
||||||
def _make_first(self, c, name):
|
|
||||||
rawfirst = self.first[name]
|
|
||||||
first = {}
|
|
||||||
for label in rawfirst:
|
|
||||||
ilabel = self._make_label(c, label)
|
|
||||||
##assert ilabel not in first # XXX failed on <> ... !=
|
|
||||||
first[ilabel] = 1
|
|
||||||
return first
|
|
||||||
|
|
||||||
def _make_label(self, c, label):
|
|
||||||
# XXX Maybe this should be a method on a subclass of converter?
|
|
||||||
ilabel = len(c.labels)
|
|
||||||
if label[0].isalpha():
|
|
||||||
# Either a symbol name or a named token
|
|
||||||
if label in c.symbol2number:
|
|
||||||
# A symbol name (a non-terminal)
|
|
||||||
if label in c.symbol2label:
|
|
||||||
return c.symbol2label[label]
|
|
||||||
else:
|
|
||||||
c.labels.append((c.symbol2number[label], None))
|
|
||||||
c.symbol2label[label] = ilabel
|
|
||||||
c.label2symbol[ilabel] = label
|
|
||||||
return ilabel
|
|
||||||
else:
|
|
||||||
# A named token (NAME, NUMBER, STRING)
|
|
||||||
itoken = getattr(self._token_namespace, label, None)
|
|
||||||
assert isinstance(itoken, int), label
|
|
||||||
if itoken in c.tokens:
|
|
||||||
return c.tokens[itoken]
|
|
||||||
else:
|
|
||||||
c.labels.append((itoken, None))
|
|
||||||
c.tokens[itoken] = ilabel
|
|
||||||
return ilabel
|
|
||||||
else:
|
|
||||||
# Either a keyword or an operator
|
|
||||||
assert label[0] in ('"', "'"), label
|
|
||||||
value = eval(label)
|
|
||||||
if value[0].isalpha():
|
|
||||||
# A keyword
|
|
||||||
if value in c.keywords:
|
|
||||||
return c.keywords[value]
|
|
||||||
else:
|
|
||||||
# TODO this might be an issue?! Using token.NAME here?
|
|
||||||
c.labels.append((token.NAME, value))
|
|
||||||
c.keywords[value] = ilabel
|
|
||||||
return ilabel
|
|
||||||
else:
|
|
||||||
# An operator (any non-numeric token)
|
|
||||||
itoken = self._token_namespace.generate_token_id(value)
|
|
||||||
if itoken in c.tokens:
|
|
||||||
return c.tokens[itoken]
|
|
||||||
else:
|
|
||||||
c.labels.append((itoken, None))
|
|
||||||
c.tokens[itoken] = ilabel
|
|
||||||
return ilabel
|
|
||||||
|
|
||||||
def _addfirstsets(self):
|
|
||||||
names = list(self.dfas.keys())
|
|
||||||
names.sort()
|
|
||||||
for name in names:
|
|
||||||
if name not in self.first:
|
|
||||||
self._calcfirst(name)
|
|
||||||
#print name, self.first[name].keys()
|
|
||||||
|
|
||||||
def _calcfirst(self, name):
|
|
||||||
dfa = self.dfas[name]
|
|
||||||
self.first[name] = None # dummy to detect left recursion
|
|
||||||
state = dfa[0]
|
|
||||||
totalset = {}
|
|
||||||
overlapcheck = {}
|
|
||||||
for label, next in state.arcs.items():
|
|
||||||
if label in self.dfas:
|
|
||||||
if label in self.first:
|
|
||||||
fset = self.first[label]
|
|
||||||
if fset is None:
|
|
||||||
raise ValueError("recursion for rule %r" % name)
|
|
||||||
else:
|
|
||||||
self._calcfirst(label)
|
|
||||||
fset = self.first[label]
|
|
||||||
totalset.update(fset)
|
|
||||||
overlapcheck[label] = fset
|
|
||||||
else:
|
|
||||||
totalset[label] = 1
|
|
||||||
overlapcheck[label] = {label: 1}
|
|
||||||
inverse = {}
|
|
||||||
for label, itsfirst in overlapcheck.items():
|
|
||||||
for symbol in itsfirst:
|
|
||||||
if symbol in inverse:
|
|
||||||
raise ValueError("rule %s is ambiguous; %s is in the"
|
|
||||||
" first sets of %s as well as %s" %
|
|
||||||
(name, symbol, label, inverse[symbol]))
|
|
||||||
inverse[symbol] = label
|
|
||||||
self.first[name] = totalset
|
|
||||||
|
|
||||||
def _parse(self):
|
|
||||||
dfas = {}
|
|
||||||
startsymbol = None
|
|
||||||
# MSTART: (NEWLINE | RULE)* ENDMARKER
|
|
||||||
while self.type != token.ENDMARKER:
|
|
||||||
while self.type == token.NEWLINE:
|
|
||||||
self._gettoken()
|
|
||||||
# RULE: NAME ':' RHS NEWLINE
|
|
||||||
name = self._expect(token.NAME)
|
|
||||||
self._expect(token.COLON)
|
|
||||||
a, z = self._parse_rhs()
|
|
||||||
self._expect(token.NEWLINE)
|
|
||||||
#self._dump_nfa(name, a, z)
|
|
||||||
dfa = self._make_dfa(a, z)
|
|
||||||
#self._dump_dfa(name, dfa)
|
|
||||||
# oldlen = len(dfa)
|
|
||||||
self._simplify_dfa(dfa)
|
|
||||||
# newlen = len(dfa)
|
|
||||||
dfas[name] = dfa
|
|
||||||
#print name, oldlen, newlen
|
|
||||||
if startsymbol is None:
|
|
||||||
startsymbol = name
|
|
||||||
return dfas, startsymbol
|
|
||||||
|
|
||||||
def _make_dfa(self, start, finish):
|
|
||||||
# To turn an NFA into a DFA, we define the states of the DFA
|
|
||||||
# to correspond to *sets* of states of the NFA. Then do some
|
|
||||||
# state reduction. Let's represent sets as dicts with 1 for
|
|
||||||
# values.
|
|
||||||
assert isinstance(start, NFAState)
|
|
||||||
assert isinstance(finish, NFAState)
|
|
||||||
|
|
||||||
def closure(state):
|
|
||||||
base = {}
|
|
||||||
addclosure(state, base)
|
|
||||||
return base
|
|
||||||
|
|
||||||
def addclosure(state, base):
|
|
||||||
assert isinstance(state, NFAState)
|
|
||||||
if state in base:
|
|
||||||
return
|
|
||||||
base[state] = 1
|
|
||||||
for label, next in state.arcs:
|
|
||||||
if label is None:
|
|
||||||
addclosure(next, base)
|
|
||||||
|
|
||||||
states = [DFAState(closure(start), finish)]
|
|
||||||
for state in states: # NB states grows while we're iterating
|
|
||||||
arcs = {}
|
|
||||||
for nfastate in state.nfaset:
|
|
||||||
for label, next in nfastate.arcs:
|
|
||||||
if label is not None:
|
|
||||||
addclosure(next, arcs.setdefault(label, {}))
|
|
||||||
for label, nfaset in arcs.items():
|
|
||||||
for st in states:
|
|
||||||
if st.nfaset == nfaset:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
st = DFAState(nfaset, finish)
|
|
||||||
states.append(st)
|
|
||||||
state.addarc(st, label)
|
|
||||||
return states # List of DFAState instances; first one is start
|
|
||||||
|
|
||||||
def _dump_nfa(self, name, start, finish):
|
|
||||||
print("Dump of NFA for", name)
|
|
||||||
todo = [start]
|
|
||||||
for i, state in enumerate(todo):
|
|
||||||
print(" State", i, state is finish and "(final)" or "")
|
|
||||||
for label, next in state.arcs:
|
|
||||||
if next in todo:
|
|
||||||
j = todo.index(next)
|
|
||||||
else:
|
|
||||||
j = len(todo)
|
|
||||||
todo.append(next)
|
|
||||||
if label is None:
|
|
||||||
print(" -> %d" % j)
|
|
||||||
else:
|
|
||||||
print(" %s -> %d" % (label, j))
|
|
||||||
|
|
||||||
def _dump_dfa(self, name, dfa):
|
|
||||||
print("Dump of DFA for", name)
|
|
||||||
for i, state in enumerate(dfa):
|
|
||||||
print(" State", i, state.isfinal and "(final)" or "")
|
|
||||||
for label, next in state.arcs.items():
|
|
||||||
print(" %s -> %d" % (label, dfa.index(next)))
|
|
||||||
|
|
||||||
def _simplify_dfa(self, dfa):
|
|
||||||
# This is not theoretically optimal, but works well enough.
|
|
||||||
# Algorithm: repeatedly look for two states that have the same
|
|
||||||
# set of arcs (same labels pointing to the same nodes) and
|
|
||||||
# unify them, until things stop changing.
|
|
||||||
|
|
||||||
# dfa is a list of DFAState instances
|
|
||||||
changes = True
|
|
||||||
while changes:
|
|
||||||
changes = False
|
|
||||||
for i, state_i in enumerate(dfa):
|
|
||||||
for j in range(i + 1, len(dfa)):
|
|
||||||
state_j = dfa[j]
|
|
||||||
if state_i == state_j:
|
|
||||||
#print " unify", i, j
|
|
||||||
del dfa[j]
|
|
||||||
for state in dfa:
|
|
||||||
state.unifystate(state_j, state_i)
|
|
||||||
changes = True
|
|
||||||
break
|
|
||||||
|
|
||||||
def _parse_rhs(self):
|
|
||||||
# RHS: ALT ('|' ALT)*
|
|
||||||
a, z = self._parse_alt()
|
|
||||||
if self.value != "|":
|
|
||||||
return a, z
|
|
||||||
else:
|
|
||||||
aa = NFAState()
|
|
||||||
zz = NFAState()
|
|
||||||
aa.addarc(a)
|
|
||||||
z.addarc(zz)
|
|
||||||
while self.value == "|":
|
|
||||||
self._gettoken()
|
|
||||||
a, z = self._parse_alt()
|
|
||||||
aa.addarc(a)
|
|
||||||
z.addarc(zz)
|
|
||||||
return aa, zz
|
|
||||||
|
|
||||||
def _parse_alt(self):
|
|
||||||
# ALT: ITEM+
|
|
||||||
a, b = self._parse_item()
|
|
||||||
while (self.value in ("(", "[") or
|
|
||||||
self.type in (token.NAME, token.STRING)):
|
|
||||||
c, d = self._parse_item()
|
|
||||||
b.addarc(c)
|
|
||||||
b = d
|
|
||||||
return a, b
|
|
||||||
|
|
||||||
def _parse_item(self):
|
|
||||||
# ITEM: '[' RHS ']' | ATOM ['+' | '*']
|
|
||||||
if self.value == "[":
|
|
||||||
self._gettoken()
|
|
||||||
a, z = self._parse_rhs()
|
|
||||||
self._expect(token.RSQB)
|
|
||||||
a.addarc(z)
|
|
||||||
return a, z
|
|
||||||
else:
|
|
||||||
a, z = self._parse_atom()
|
|
||||||
value = self.value
|
|
||||||
if value not in ("+", "*"):
|
|
||||||
return a, z
|
|
||||||
self._gettoken()
|
|
||||||
z.addarc(a)
|
|
||||||
if value == "+":
|
|
||||||
return a, z
|
|
||||||
else:
|
|
||||||
return a, a
|
|
||||||
|
|
||||||
def _parse_atom(self):
|
|
||||||
# ATOM: '(' RHS ')' | NAME | STRING
|
|
||||||
if self.value == "(":
|
|
||||||
self._gettoken()
|
|
||||||
a, z = self._parse_rhs()
|
|
||||||
self._expect(token.RPAR)
|
|
||||||
return a, z
|
|
||||||
elif self.type in (token.NAME, token.STRING):
|
|
||||||
a = NFAState()
|
|
||||||
z = NFAState()
|
|
||||||
a.addarc(z, self.value)
|
|
||||||
self._gettoken()
|
|
||||||
return a, z
|
|
||||||
else:
|
|
||||||
self._raise_error("expected (...) or NAME or STRING, got %s/%s",
|
|
||||||
self.type, self.value)
|
|
||||||
|
|
||||||
def _expect(self, type):
|
|
||||||
if self.type != type:
|
|
||||||
self._raise_error("expected %s, got %s(%s)",
|
|
||||||
type, self.type, self.value)
|
|
||||||
value = self.value
|
|
||||||
self._gettoken()
|
|
||||||
return value
|
|
||||||
|
|
||||||
def _gettoken(self):
|
|
||||||
tup = next(self.generator)
|
|
||||||
while tup[0] in (token.COMMENT, token.NL):
|
|
||||||
tup = next(self.generator)
|
|
||||||
self.type, self.value, self.begin, prefix = tup
|
|
||||||
|
|
||||||
def _raise_error(self, msg, *args):
|
|
||||||
if args:
|
|
||||||
try:
|
|
||||||
msg = msg % args
|
|
||||||
except:
|
|
||||||
msg = " ".join([msg] + list(map(str, args)))
|
|
||||||
line = self._bnf_text.splitlines()[self.begin[0] - 1]
|
|
||||||
raise SyntaxError(msg, ('<grammar>', self.begin[0],
|
|
||||||
self.begin[1], line))
|
|
||||||
|
|
||||||
|
|
||||||
class NFAState(object):
|
|
||||||
def __init__(self):
|
|
||||||
self.arcs = [] # list of (label, NFAState) pairs
|
|
||||||
|
|
||||||
def addarc(self, next, label=None):
|
|
||||||
assert label is None or isinstance(label, str)
|
|
||||||
assert isinstance(next, NFAState)
|
|
||||||
self.arcs.append((label, next))
|
|
||||||
|
|
||||||
|
|
||||||
class DFAState(object):
|
|
||||||
def __init__(self, nfaset, final):
|
|
||||||
assert isinstance(nfaset, dict)
|
|
||||||
assert isinstance(next(iter(nfaset)), NFAState)
|
|
||||||
assert isinstance(final, NFAState)
|
|
||||||
self.nfaset = nfaset
|
|
||||||
self.isfinal = final in nfaset
|
|
||||||
self.arcs = {} # map from label to DFAState
|
|
||||||
|
|
||||||
def addarc(self, next, label):
|
|
||||||
assert isinstance(label, str)
|
|
||||||
assert label not in self.arcs
|
|
||||||
assert isinstance(next, DFAState)
|
|
||||||
self.arcs[label] = next
|
|
||||||
|
|
||||||
def unifystate(self, old, new):
|
|
||||||
for label, next in self.arcs.items():
|
|
||||||
if next is old:
|
|
||||||
self.arcs[label] = new
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
# Equality test -- ignore the nfaset instance variable
|
|
||||||
assert isinstance(other, DFAState)
|
|
||||||
if self.isfinal != other.isfinal:
|
|
||||||
return False
|
|
||||||
# Can't just return self.arcs == other.arcs, because that
|
|
||||||
# would invoke this method recursively, with cycles...
|
|
||||||
if len(self.arcs) != len(other.arcs):
|
|
||||||
return False
|
|
||||||
for label, next in self.arcs.items():
|
|
||||||
if next is not other.arcs.get(label):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
__hash__ = None # For Py3 compatibility.
|
|
||||||
|
|
||||||
|
|
||||||
def generate_grammar(bnf_text, token_namespace):
|
|
||||||
"""
|
|
||||||
``bnf_text`` is a grammar in extended BNF (using * for repetition, + for
|
|
||||||
at-least-once repetition, [] for optional parts, | for alternatives and ()
|
|
||||||
for grouping).
|
|
||||||
|
|
||||||
It's not EBNF according to ISO/IEC 14977. It's a dialect Python uses in its
|
|
||||||
own parser.
|
|
||||||
"""
|
|
||||||
p = ParserGenerator(bnf_text, token_namespace)
|
|
||||||
return p.make_grammar()
|
|
||||||
@@ -13,10 +13,81 @@ import logging
|
|||||||
from parso.utils import split_lines
|
from parso.utils import split_lines
|
||||||
from parso.python.parser import Parser
|
from parso.python.parser import Parser
|
||||||
from parso.python.tree import EndMarker
|
from parso.python.tree import EndMarker
|
||||||
from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
|
from parso.python.tokenize import PythonToken
|
||||||
ENDMARKER, INDENT, DEDENT)
|
from parso.python.token import PythonTokenTypes
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
DEBUG_DIFF_PARSER = False
|
||||||
|
|
||||||
|
_INDENTATION_TOKENS = 'INDENT', 'ERROR_DEDENT', 'DEDENT'
|
||||||
|
|
||||||
|
|
||||||
|
def _get_previous_leaf_if_indentation(leaf):
|
||||||
|
while leaf and leaf.type == 'error_leaf' \
|
||||||
|
and leaf.token_type in _INDENTATION_TOKENS:
|
||||||
|
leaf = leaf.get_previous_leaf()
|
||||||
|
return leaf
|
||||||
|
|
||||||
|
|
||||||
|
def _get_next_leaf_if_indentation(leaf):
|
||||||
|
while leaf and leaf.type == 'error_leaf' \
|
||||||
|
and leaf.token_type in _INDENTATION_TOKENS:
|
||||||
|
leaf = leaf.get_previous_leaf()
|
||||||
|
return leaf
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_valid_graph(node):
|
||||||
|
"""
|
||||||
|
Checks if the parent/children relationship is correct.
|
||||||
|
|
||||||
|
This is a check that only runs during debugging/testing.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
children = node.children
|
||||||
|
except AttributeError:
|
||||||
|
# Ignore INDENT is necessary, because indent/dedent tokens don't
|
||||||
|
# contain value/prefix and are just around, because of the tokenizer.
|
||||||
|
if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
|
||||||
|
assert not node.value
|
||||||
|
assert not node.prefix
|
||||||
|
return
|
||||||
|
|
||||||
|
# Calculate the content between two start positions.
|
||||||
|
previous_leaf = _get_previous_leaf_if_indentation(node.get_previous_leaf())
|
||||||
|
if previous_leaf is None:
|
||||||
|
content = node.prefix
|
||||||
|
previous_start_pos = 1, 0
|
||||||
|
else:
|
||||||
|
assert previous_leaf.end_pos <= node.start_pos, \
|
||||||
|
(previous_leaf, node)
|
||||||
|
|
||||||
|
content = previous_leaf.value + node.prefix
|
||||||
|
previous_start_pos = previous_leaf.start_pos
|
||||||
|
|
||||||
|
if '\n' in content or '\r' in content:
|
||||||
|
splitted = split_lines(content)
|
||||||
|
line = previous_start_pos[0] + len(splitted) - 1
|
||||||
|
actual = line, len(splitted[-1])
|
||||||
|
else:
|
||||||
|
actual = previous_start_pos[0], previous_start_pos[1] + len(content)
|
||||||
|
|
||||||
|
assert node.start_pos == actual, (node.start_pos, actual)
|
||||||
|
else:
|
||||||
|
for child in children:
|
||||||
|
assert child.parent == node, (node, child)
|
||||||
|
_assert_valid_graph(child)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_debug_error_message(module, old_lines, new_lines):
|
||||||
|
current_lines = split_lines(module.get_code(), keepends=True)
|
||||||
|
current_diff = difflib.unified_diff(new_lines, current_lines)
|
||||||
|
old_new_diff = difflib.unified_diff(old_lines, new_lines)
|
||||||
|
import parso
|
||||||
|
return (
|
||||||
|
"There's an issue with the diff parser. Please "
|
||||||
|
"report (parso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s"
|
||||||
|
% (parso.__version__, ''.join(old_new_diff), ''.join(current_diff))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _get_last_line(node_or_leaf):
|
def _get_last_line(node_or_leaf):
|
||||||
@@ -27,13 +98,21 @@ def _get_last_line(node_or_leaf):
|
|||||||
return last_leaf.end_pos[0]
|
return last_leaf.end_pos[0]
|
||||||
|
|
||||||
|
|
||||||
|
def _skip_dedent_error_leaves(leaf):
|
||||||
|
while leaf is not None and leaf.type == 'error_leaf' and leaf.token_type == 'DEDENT':
|
||||||
|
leaf = leaf.get_previous_leaf()
|
||||||
|
return leaf
|
||||||
|
|
||||||
|
|
||||||
def _ends_with_newline(leaf, suffix=''):
|
def _ends_with_newline(leaf, suffix=''):
|
||||||
|
leaf = _skip_dedent_error_leaves(leaf)
|
||||||
|
|
||||||
if leaf.type == 'error_leaf':
|
if leaf.type == 'error_leaf':
|
||||||
typ = leaf.original_type
|
typ = leaf.token_type.lower()
|
||||||
else:
|
else:
|
||||||
typ = leaf.type
|
typ = leaf.type
|
||||||
|
|
||||||
return typ == 'newline' or suffix.endswith('\n')
|
return typ == 'newline' or suffix.endswith('\n') or suffix.endswith('\r')
|
||||||
|
|
||||||
|
|
||||||
def _flows_finished(pgen_grammar, stack):
|
def _flows_finished(pgen_grammar, stack):
|
||||||
@@ -41,32 +120,45 @@ def _flows_finished(pgen_grammar, stack):
|
|||||||
if, while, for and try might not be finished, because another part might
|
if, while, for and try might not be finished, because another part might
|
||||||
still be parsed.
|
still be parsed.
|
||||||
"""
|
"""
|
||||||
for dfa, newstate, (symbol_number, nodes) in stack:
|
for stack_node in stack:
|
||||||
if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
|
if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
|
||||||
'for_stmt', 'try_stmt'):
|
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def suite_or_file_input_is_valid(pgen_grammar, stack):
|
def _func_or_class_has_suite(node):
|
||||||
|
if node.type == 'decorated':
|
||||||
|
node = node.children[-1]
|
||||||
|
if node.type in ('async_funcdef', 'async_stmt'):
|
||||||
|
node = node.children[-1]
|
||||||
|
return node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite'
|
||||||
|
|
||||||
|
|
||||||
|
def _suite_or_file_input_is_valid(pgen_grammar, stack):
|
||||||
if not _flows_finished(pgen_grammar, stack):
|
if not _flows_finished(pgen_grammar, stack):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
for stack_node in reversed(stack):
|
||||||
if pgen_grammar.number2symbol[symbol_number] == 'suite':
|
if stack_node.nonterminal == 'decorator':
|
||||||
|
# A decorator is only valid with the upcoming function.
|
||||||
|
return False
|
||||||
|
|
||||||
|
if stack_node.nonterminal == 'suite':
|
||||||
# If only newline is in the suite, the suite is not valid, yet.
|
# If only newline is in the suite, the suite is not valid, yet.
|
||||||
return len(nodes) > 1
|
return len(stack_node.nodes) > 1
|
||||||
# Not reaching a suite means that we're dealing with file_input levels
|
# Not reaching a suite means that we're dealing with file_input levels
|
||||||
# where there's no need for a valid statement in it. It can also be empty.
|
# where there's no need for a valid statement in it. It can also be empty.
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def _is_flow_node(node):
|
def _is_flow_node(node):
|
||||||
|
if node.type == 'async_stmt':
|
||||||
|
node = node.children[1]
|
||||||
try:
|
try:
|
||||||
value = node.children[0].value
|
value = node.children[0].value
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
return False
|
return False
|
||||||
return value in ('if', 'for', 'while', 'try')
|
return value in ('if', 'for', 'while', 'try', 'with')
|
||||||
|
|
||||||
|
|
||||||
class _PositionUpdatingFinished(Exception):
|
class _PositionUpdatingFinished(Exception):
|
||||||
@@ -100,7 +192,7 @@ class DiffParser(object):
|
|||||||
self._copy_count = 0
|
self._copy_count = 0
|
||||||
self._parser_count = 0
|
self._parser_count = 0
|
||||||
|
|
||||||
self._nodes_stack = _NodesStack(self._module)
|
self._nodes_tree = _NodesTree(self._module)
|
||||||
|
|
||||||
def update(self, old_lines, new_lines):
|
def update(self, old_lines, new_lines):
|
||||||
'''
|
'''
|
||||||
@@ -129,11 +221,10 @@ class DiffParser(object):
|
|||||||
line_length = len(new_lines)
|
line_length = len(new_lines)
|
||||||
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
|
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
|
||||||
opcodes = sm.get_opcodes()
|
opcodes = sm.get_opcodes()
|
||||||
LOG.debug('diff parser calculated')
|
LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))
|
||||||
LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
|
|
||||||
|
|
||||||
for operation, i1, i2, j1, j2 in opcodes:
|
for operation, i1, i2, j1, j2 in opcodes:
|
||||||
LOG.debug('diff %s old[%s:%s] new[%s:%s]',
|
LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
|
||||||
operation, i1 + 1, i2, j1 + 1, j2)
|
operation, i1 + 1, i2, j1 + 1, j2)
|
||||||
|
|
||||||
if j2 == line_length and new_lines[-1] == '':
|
if j2 == line_length and new_lines[-1] == '':
|
||||||
@@ -152,48 +243,47 @@ class DiffParser(object):
|
|||||||
|
|
||||||
# With this action all change will finally be applied and we have a
|
# With this action all change will finally be applied and we have a
|
||||||
# changed module.
|
# changed module.
|
||||||
self._nodes_stack.close()
|
self._nodes_tree.close()
|
||||||
|
|
||||||
|
if DEBUG_DIFF_PARSER:
|
||||||
|
# If there is reasonable suspicion that the diff parser is not
|
||||||
|
# behaving well, this should be enabled.
|
||||||
|
try:
|
||||||
|
assert self._module.get_code() == ''.join(new_lines)
|
||||||
|
_assert_valid_graph(self._module)
|
||||||
|
except AssertionError:
|
||||||
|
print(_get_debug_error_message(self._module, old_lines, new_lines))
|
||||||
|
raise
|
||||||
|
|
||||||
last_pos = self._module.end_pos[0]
|
last_pos = self._module.end_pos[0]
|
||||||
if last_pos != line_length:
|
if last_pos != line_length:
|
||||||
current_lines = split_lines(self._module.get_code(), keepends=True)
|
|
||||||
diff = difflib.unified_diff(current_lines, new_lines)
|
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"There's an issue (%s != %s) with the diff parser. Please report:\n%s"
|
('(%s != %s) ' % (last_pos, line_length))
|
||||||
% (last_pos, line_length, ''.join(diff))
|
+ _get_debug_error_message(self._module, old_lines, new_lines)
|
||||||
)
|
)
|
||||||
|
|
||||||
LOG.debug('diff parser end')
|
LOG.debug('diff parser end')
|
||||||
return self._module
|
return self._module
|
||||||
|
|
||||||
def _enabled_debugging(self, old_lines, lines_new):
|
def _enabled_debugging(self, old_lines, lines_new):
|
||||||
if self._module.get_code() != ''.join(lines_new):
|
if self._module.get_code() != ''.join(lines_new):
|
||||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))
|
||||||
''.join(lines_new))
|
|
||||||
|
|
||||||
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
||||||
copied_nodes = [None]
|
|
||||||
|
|
||||||
last_until_line = -1
|
last_until_line = -1
|
||||||
while until_line_new > self._nodes_stack.parsed_until_line:
|
while until_line_new > self._nodes_tree.parsed_until_line:
|
||||||
parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
|
parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
|
||||||
line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
|
line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
|
||||||
if line_stmt is None:
|
if line_stmt is None:
|
||||||
# Parse 1 line at least. We don't need more, because we just
|
# Parse 1 line at least. We don't need more, because we just
|
||||||
# want to get into a state where the old parser has statements
|
# want to get into a state where the old parser has statements
|
||||||
# again that can be copied (e.g. not lines within parentheses).
|
# again that can be copied (e.g. not lines within parentheses).
|
||||||
self._parse(self._nodes_stack.parsed_until_line + 1)
|
self._parse(self._nodes_tree.parsed_until_line + 1)
|
||||||
elif not copied_nodes:
|
|
||||||
# We have copied as much as possible (but definitely not too
|
|
||||||
# much). Therefore we just parse the rest.
|
|
||||||
# We might not reach the end, because there's a statement
|
|
||||||
# that is not finished.
|
|
||||||
self._parse(until_line_new)
|
|
||||||
else:
|
else:
|
||||||
p_children = line_stmt.parent.children
|
p_children = line_stmt.parent.children
|
||||||
index = p_children.index(line_stmt)
|
index = p_children.index(line_stmt)
|
||||||
|
|
||||||
copied_nodes = self._nodes_stack.copy_nodes(
|
from_ = self._nodes_tree.parsed_until_line + 1
|
||||||
|
copied_nodes = self._nodes_tree.copy_nodes(
|
||||||
p_children[index:],
|
p_children[index:],
|
||||||
until_line_old,
|
until_line_old,
|
||||||
line_offset
|
line_offset
|
||||||
@@ -202,15 +292,19 @@ class DiffParser(object):
|
|||||||
if copied_nodes:
|
if copied_nodes:
|
||||||
self._copy_count += 1
|
self._copy_count += 1
|
||||||
|
|
||||||
from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
|
to = self._nodes_tree.parsed_until_line
|
||||||
to = self._nodes_stack.parsed_until_line
|
|
||||||
|
|
||||||
LOG.debug('diff actually copy %s to %s', from_, to)
|
LOG.debug('copy old[%s:%s] new[%s:%s]',
|
||||||
|
copied_nodes[0].start_pos[0],
|
||||||
|
copied_nodes[-1].end_pos[0] - 1, from_, to)
|
||||||
|
else:
|
||||||
|
# We have copied as much as possible (but definitely not too
|
||||||
|
# much). Therefore we just parse a bit more.
|
||||||
|
self._parse(self._nodes_tree.parsed_until_line + 1)
|
||||||
# Since there are potential bugs that might loop here endlessly, we
|
# Since there are potential bugs that might loop here endlessly, we
|
||||||
# just stop here.
|
# just stop here.
|
||||||
assert last_until_line != self._nodes_stack.parsed_until_line \
|
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
|
||||||
or not copied_nodes, last_until_line
|
last_until_line = self._nodes_tree.parsed_until_line
|
||||||
last_until_line = self._nodes_stack.parsed_until_line
|
|
||||||
|
|
||||||
def _get_old_line_stmt(self, old_line):
|
def _get_old_line_stmt(self, old_line):
|
||||||
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
|
leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)
|
||||||
@@ -221,46 +315,36 @@ class DiffParser(object):
|
|||||||
node = leaf
|
node = leaf
|
||||||
while node.parent.type not in ('file_input', 'suite'):
|
while node.parent.type not in ('file_input', 'suite'):
|
||||||
node = node.parent
|
node = node.parent
|
||||||
|
|
||||||
|
# Make sure that if only the `else:` line of an if statement is
|
||||||
|
# copied that not the whole thing is going to be copied.
|
||||||
|
if node.start_pos[0] >= old_line:
|
||||||
return node
|
return node
|
||||||
# Must be on the same line. Otherwise we need to parse that bit.
|
# Must be on the same line. Otherwise we need to parse that bit.
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_before_insertion_node(self):
|
|
||||||
if self._nodes_stack.is_empty():
|
|
||||||
return None
|
|
||||||
|
|
||||||
line = self._nodes_stack.parsed_until_line + 1
|
|
||||||
node = self._new_module.get_last_leaf()
|
|
||||||
while True:
|
|
||||||
parent = node.parent
|
|
||||||
if parent.type in ('suite', 'file_input'):
|
|
||||||
assert node.end_pos[0] <= line
|
|
||||||
assert node.end_pos[1] == 0 or '\n' in self._prefix
|
|
||||||
return node
|
|
||||||
node = parent
|
|
||||||
|
|
||||||
def _parse(self, until_line):
|
def _parse(self, until_line):
|
||||||
"""
|
"""
|
||||||
Parses at least until the given line, but might just parse more until a
|
Parses at least until the given line, but might just parse more until a
|
||||||
valid state is reached.
|
valid state is reached.
|
||||||
"""
|
"""
|
||||||
last_until_line = 0
|
last_until_line = 0
|
||||||
while until_line > self._nodes_stack.parsed_until_line:
|
while until_line > self._nodes_tree.parsed_until_line:
|
||||||
node = self._try_parse_part(until_line)
|
node = self._try_parse_part(until_line)
|
||||||
nodes = node.children
|
nodes = node.children
|
||||||
|
|
||||||
self._nodes_stack.add_parsed_nodes(nodes)
|
self._nodes_tree.add_parsed_nodes(nodes)
|
||||||
LOG.debug(
|
LOG.debug(
|
||||||
'parse_part from %s to %s (to %s in part parser)',
|
'parse_part from %s to %s (to %s in part parser)',
|
||||||
nodes[0].get_start_pos_of_prefix()[0],
|
nodes[0].get_start_pos_of_prefix()[0],
|
||||||
self._nodes_stack.parsed_until_line,
|
self._nodes_tree.parsed_until_line,
|
||||||
node.end_pos[0] - 1
|
node.end_pos[0] - 1
|
||||||
)
|
)
|
||||||
# Since the tokenizer sometimes has bugs, we cannot be sure that
|
# Since the tokenizer sometimes has bugs, we cannot be sure that
|
||||||
# this loop terminates. Therefore assert that there's always a
|
# this loop terminates. Therefore assert that there's always a
|
||||||
# change.
|
# change.
|
||||||
assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
|
assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
|
||||||
last_until_line = self._nodes_stack.parsed_until_line
|
last_until_line = self._nodes_tree.parsed_until_line
|
||||||
|
|
||||||
def _try_parse_part(self, until_line):
|
def _try_parse_part(self, until_line):
|
||||||
"""
|
"""
|
||||||
@@ -271,9 +355,8 @@ class DiffParser(object):
|
|||||||
self._parser_count += 1
|
self._parser_count += 1
|
||||||
# TODO speed up, shouldn't copy the whole list all the time.
|
# TODO speed up, shouldn't copy the whole list all the time.
|
||||||
# memoryview?
|
# memoryview?
|
||||||
parsed_until_line = self._nodes_stack.parsed_until_line
|
parsed_until_line = self._nodes_tree.parsed_until_line
|
||||||
lines_after = self._parser_lines_new[parsed_until_line:]
|
lines_after = self._parser_lines_new[parsed_until_line:]
|
||||||
#print('parse_content', parsed_until_line, lines_after, until_line)
|
|
||||||
tokens = self._diff_tokenize(
|
tokens = self._diff_tokenize(
|
||||||
lines_after,
|
lines_after,
|
||||||
until_line,
|
until_line,
|
||||||
@@ -290,10 +373,10 @@ class DiffParser(object):
|
|||||||
omitted_first_indent = False
|
omitted_first_indent = False
|
||||||
indents = []
|
indents = []
|
||||||
tokens = self._tokenizer(lines, (1, 0))
|
tokens = self._tokenizer(lines, (1, 0))
|
||||||
stack = self._active_parser.pgen_parser.stack
|
stack = self._active_parser.stack
|
||||||
for typ, string, start_pos, prefix in tokens:
|
for typ, string, start_pos, prefix in tokens:
|
||||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||||
if typ == INDENT:
|
if typ == PythonTokenTypes.INDENT:
|
||||||
indents.append(start_pos[1])
|
indents.append(start_pos[1])
|
||||||
if is_first_token:
|
if is_first_token:
|
||||||
omitted_first_indent = True
|
omitted_first_indent = True
|
||||||
@@ -306,29 +389,36 @@ class DiffParser(object):
|
|||||||
|
|
||||||
# In case of omitted_first_indent, it might not be dedented fully.
|
# In case of omitted_first_indent, it might not be dedented fully.
|
||||||
# However this is a sign for us that a dedent happened.
|
# However this is a sign for us that a dedent happened.
|
||||||
if typ == DEDENT \
|
if typ == PythonTokenTypes.DEDENT \
|
||||||
or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1:
|
or typ == PythonTokenTypes.ERROR_DEDENT \
|
||||||
|
and omitted_first_indent and len(indents) == 1:
|
||||||
indents.pop()
|
indents.pop()
|
||||||
if omitted_first_indent and not indents:
|
if omitted_first_indent and not indents:
|
||||||
# We are done here, only thing that can come now is an
|
# We are done here, only thing that can come now is an
|
||||||
# endmarker or another dedented code block.
|
# endmarker or another dedented code block.
|
||||||
typ, string, start_pos, prefix = next(tokens)
|
typ, string, start_pos, prefix = next(tokens)
|
||||||
if '\n' in prefix:
|
if '\n' in prefix or '\r' in prefix:
|
||||||
prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
|
prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
|
||||||
else:
|
else:
|
||||||
|
assert start_pos[1] >= len(prefix), repr(prefix)
|
||||||
|
if start_pos[1] - len(prefix) == 0:
|
||||||
prefix = ''
|
prefix = ''
|
||||||
yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix)
|
yield PythonToken(
|
||||||
|
PythonTokenTypes.ENDMARKER, '',
|
||||||
|
(start_pos[0] + line_offset, 0),
|
||||||
|
prefix
|
||||||
|
)
|
||||||
break
|
break
|
||||||
elif typ == NEWLINE and start_pos[0] >= until_line:
|
elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
|
||||||
yield PythonToken(typ, string, start_pos, prefix)
|
yield PythonToken(typ, string, start_pos, prefix)
|
||||||
# Check if the parser is actually in a valid suite state.
|
# Check if the parser is actually in a valid suite state.
|
||||||
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||||
start_pos = start_pos[0] + 1, 0
|
start_pos = start_pos[0] + 1, 0
|
||||||
while len(indents) > int(omitted_first_indent):
|
while len(indents) > int(omitted_first_indent):
|
||||||
indents.pop()
|
indents.pop()
|
||||||
yield PythonToken(DEDENT, '', start_pos, '')
|
yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')
|
||||||
|
|
||||||
yield PythonToken(ENDMARKER, '', start_pos, '')
|
yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
@@ -336,17 +426,23 @@ class DiffParser(object):
|
|||||||
yield PythonToken(typ, string, start_pos, prefix)
|
yield PythonToken(typ, string, start_pos, prefix)
|
||||||
|
|
||||||
|
|
||||||
class _NodesStackNode(object):
|
class _NodesTreeNode(object):
|
||||||
ChildrenGroup = namedtuple('ChildrenGroup', 'children line_offset last_line_offset_leaf')
|
_ChildrenGroup = namedtuple('_ChildrenGroup', 'prefix children line_offset last_line_offset_leaf')
|
||||||
|
|
||||||
def __init__(self, tree_node, parent=None):
|
def __init__(self, tree_node, parent=None):
|
||||||
self.tree_node = tree_node
|
self.tree_node = tree_node
|
||||||
self.children_groups = []
|
self._children_groups = []
|
||||||
self.parent = parent
|
self.parent = parent
|
||||||
|
self._node_children = []
|
||||||
|
|
||||||
def close(self):
|
def finish(self):
|
||||||
children = []
|
children = []
|
||||||
for children_part, line_offset, last_line_offset_leaf in self.children_groups:
|
for prefix, children_part, line_offset, last_line_offset_leaf in self._children_groups:
|
||||||
|
first_leaf = _get_next_leaf_if_indentation(
|
||||||
|
children_part[0].get_first_leaf()
|
||||||
|
)
|
||||||
|
|
||||||
|
first_leaf.prefix = prefix + first_leaf.prefix
|
||||||
if line_offset != 0:
|
if line_offset != 0:
|
||||||
try:
|
try:
|
||||||
_update_positions(
|
_update_positions(
|
||||||
@@ -359,59 +455,61 @@ class _NodesStackNode(object):
|
|||||||
for node in children:
|
for node in children:
|
||||||
node.parent = self.tree_node
|
node.parent = self.tree_node
|
||||||
|
|
||||||
def add(self, children, line_offset=0, last_line_offset_leaf=None):
|
for node_child in self._node_children:
|
||||||
group = self.ChildrenGroup(children, line_offset, last_line_offset_leaf)
|
node_child.finish()
|
||||||
self.children_groups.append(group)
|
|
||||||
|
def add_child_node(self, child_node):
|
||||||
|
self._node_children.append(child_node)
|
||||||
|
|
||||||
|
def add_tree_nodes(self, prefix, children, line_offset=0, last_line_offset_leaf=None):
|
||||||
|
if last_line_offset_leaf is None:
|
||||||
|
last_line_offset_leaf = children[-1].get_last_leaf()
|
||||||
|
group = self._ChildrenGroup(prefix, children, line_offset, last_line_offset_leaf)
|
||||||
|
self._children_groups.append(group)
|
||||||
|
|
||||||
def get_last_line(self, suffix):
|
def get_last_line(self, suffix):
|
||||||
line = 0
|
line = 0
|
||||||
if self.children_groups:
|
if self._children_groups:
|
||||||
children_group = self.children_groups[-1]
|
children_group = self._children_groups[-1]
|
||||||
last_leaf = children_group.children[-1].get_last_leaf()
|
last_leaf = _get_previous_leaf_if_indentation(
|
||||||
line = last_leaf.end_pos[0]
|
children_group.last_line_offset_leaf
|
||||||
|
)
|
||||||
|
|
||||||
# Calculate the line offsets
|
line = last_leaf.end_pos[0] + children_group.line_offset
|
||||||
offset = children_group.line_offset
|
|
||||||
if offset:
|
|
||||||
# In case the line_offset is not applied to this specific leaf,
|
|
||||||
# just ignore it.
|
|
||||||
if last_leaf.line <= children_group.last_line_offset_leaf.line:
|
|
||||||
line += children_group.line_offset
|
|
||||||
|
|
||||||
# Newlines end on the next line, which means that they would cover
|
# Newlines end on the next line, which means that they would cover
|
||||||
# the next line. That line is not fully parsed at this point.
|
# the next line. That line is not fully parsed at this point.
|
||||||
if _ends_with_newline(last_leaf, suffix):
|
if _ends_with_newline(last_leaf, suffix):
|
||||||
line -= 1
|
line -= 1
|
||||||
line += suffix.count('\n')
|
line += len(split_lines(suffix)) - 1
|
||||||
if suffix and not suffix.endswith('\n'):
|
|
||||||
|
if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
|
||||||
# This is the end of a file (that doesn't end with a newline).
|
# This is the end of a file (that doesn't end with a newline).
|
||||||
line += 1
|
line += 1
|
||||||
|
|
||||||
|
if self._node_children:
|
||||||
|
return max(line, self._node_children[-1].get_last_line(suffix))
|
||||||
return line
|
return line
|
||||||
|
|
||||||
|
|
||||||
class _NodesStack(object):
|
class _NodesTree(object):
|
||||||
endmarker_type = 'endmarker'
|
|
||||||
|
|
||||||
def __init__(self, module):
|
def __init__(self, module):
|
||||||
# Top of stack
|
self._base_node = _NodesTreeNode(module)
|
||||||
self._tos = self._base_node = _NodesStackNode(module)
|
self._working_stack = [self._base_node]
|
||||||
self._module = module
|
self._module = module
|
||||||
self._last_prefix = ''
|
self._prefix_remainder = ''
|
||||||
self.prefix = ''
|
self.prefix = ''
|
||||||
|
|
||||||
def is_empty(self):
|
|
||||||
return not self._base_node.children
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parsed_until_line(self):
|
def parsed_until_line(self):
|
||||||
return self._tos.get_last_line(self.prefix)
|
return self._working_stack[-1].get_last_line(self.prefix)
|
||||||
|
|
||||||
def _get_insertion_node(self, indentation_node):
|
def _get_insertion_node(self, indentation_node):
|
||||||
indentation = indentation_node.start_pos[1]
|
indentation = indentation_node.start_pos[1]
|
||||||
|
|
||||||
# find insertion node
|
# find insertion node
|
||||||
node = self._tos
|
|
||||||
while True:
|
while True:
|
||||||
|
node = self._working_stack[-1]
|
||||||
tree_node = node.tree_node
|
tree_node = node.tree_node
|
||||||
if tree_node.type == 'suite':
|
if tree_node.type == 'suite':
|
||||||
# A suite starts with NEWLINE, ...
|
# A suite starts with NEWLINE, ...
|
||||||
@@ -426,53 +524,57 @@ class _NodesStack(object):
|
|||||||
elif tree_node.type == 'file_input':
|
elif tree_node.type == 'file_input':
|
||||||
return node
|
return node
|
||||||
|
|
||||||
node = self._close_tos()
|
self._working_stack.pop()
|
||||||
|
|
||||||
def _close_tos(self):
|
|
||||||
self._tos.close()
|
|
||||||
self._tos = self._tos.parent
|
|
||||||
return self._tos
|
|
||||||
|
|
||||||
def add_parsed_nodes(self, tree_nodes):
|
def add_parsed_nodes(self, tree_nodes):
|
||||||
|
old_prefix = self.prefix
|
||||||
tree_nodes = self._remove_endmarker(tree_nodes)
|
tree_nodes = self._remove_endmarker(tree_nodes)
|
||||||
if not tree_nodes:
|
if not tree_nodes:
|
||||||
|
self.prefix = old_prefix + self.prefix
|
||||||
return
|
return
|
||||||
|
|
||||||
assert tree_nodes[0].type != 'newline'
|
assert tree_nodes[0].type != 'newline'
|
||||||
|
|
||||||
node = self._get_insertion_node(tree_nodes[0])
|
node = self._get_insertion_node(tree_nodes[0])
|
||||||
assert node.tree_node.type in ('suite', 'file_input')
|
assert node.tree_node.type in ('suite', 'file_input')
|
||||||
node.add(tree_nodes)
|
node.add_tree_nodes(old_prefix, tree_nodes)
|
||||||
|
# tos = Top of stack
|
||||||
self._update_tos(tree_nodes[-1])
|
self._update_tos(tree_nodes[-1])
|
||||||
|
|
||||||
|
def _update_tos(self, tree_node):
|
||||||
|
if tree_node.type in ('suite', 'file_input'):
|
||||||
|
new_tos = _NodesTreeNode(tree_node)
|
||||||
|
new_tos.add_tree_nodes('', list(tree_node.children))
|
||||||
|
|
||||||
|
self._working_stack[-1].add_child_node(new_tos)
|
||||||
|
self._working_stack.append(new_tos)
|
||||||
|
|
||||||
|
self._update_tos(tree_node.children[-1])
|
||||||
|
elif _func_or_class_has_suite(tree_node):
|
||||||
|
self._update_tos(tree_node.children[-1])
|
||||||
|
|
||||||
def _remove_endmarker(self, tree_nodes):
|
def _remove_endmarker(self, tree_nodes):
|
||||||
"""
|
"""
|
||||||
Helps cleaning up the tree nodes that get inserted.
|
Helps cleaning up the tree nodes that get inserted.
|
||||||
"""
|
"""
|
||||||
last_leaf = tree_nodes[-1].get_last_leaf()
|
last_leaf = tree_nodes[-1].get_last_leaf()
|
||||||
is_endmarker = last_leaf.type == self.endmarker_type
|
is_endmarker = last_leaf.type == 'endmarker'
|
||||||
self._last_prefix = ''
|
self._prefix_remainder = ''
|
||||||
if is_endmarker:
|
if is_endmarker:
|
||||||
try:
|
separation = max(last_leaf.prefix.rfind('\n'), last_leaf.prefix.rfind('\r'))
|
||||||
separation = last_leaf.prefix.rindex('\n')
|
if separation > -1:
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# Remove the whitespace part of the prefix after a newline.
|
# Remove the whitespace part of the prefix after a newline.
|
||||||
# That is not relevant if parentheses were opened. Always parse
|
# That is not relevant if parentheses were opened. Always parse
|
||||||
# until the end of a line.
|
# until the end of a line.
|
||||||
last_leaf.prefix, self._last_prefix = \
|
last_leaf.prefix, self._prefix_remainder = \
|
||||||
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||||
|
|
||||||
first_leaf = tree_nodes[0].get_first_leaf()
|
|
||||||
first_leaf.prefix = self.prefix + first_leaf.prefix
|
|
||||||
self.prefix = ''
|
self.prefix = ''
|
||||||
|
|
||||||
if is_endmarker:
|
if is_endmarker:
|
||||||
self.prefix = last_leaf.prefix
|
self.prefix = last_leaf.prefix
|
||||||
|
|
||||||
tree_nodes = tree_nodes[:-1]
|
tree_nodes = tree_nodes[:-1]
|
||||||
|
|
||||||
return tree_nodes
|
return tree_nodes
|
||||||
|
|
||||||
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
||||||
@@ -481,55 +583,76 @@ class _NodesStack(object):
|
|||||||
|
|
||||||
Returns the number of tree nodes that were copied.
|
Returns the number of tree nodes that were copied.
|
||||||
"""
|
"""
|
||||||
tos = self._get_insertion_node(tree_nodes[0])
|
if tree_nodes[0].type in ('error_leaf', 'error_node'):
|
||||||
|
# Avoid copying errors in the beginning. Can lead to a lot of
|
||||||
|
# issues.
|
||||||
|
return []
|
||||||
|
|
||||||
new_nodes, self._tos = self._copy_nodes(tos, tree_nodes, until_line, line_offset)
|
self._get_insertion_node(tree_nodes[0])
|
||||||
|
|
||||||
|
new_nodes, self._working_stack, self.prefix = self._copy_nodes(
|
||||||
|
list(self._working_stack),
|
||||||
|
tree_nodes,
|
||||||
|
until_line,
|
||||||
|
line_offset,
|
||||||
|
self.prefix,
|
||||||
|
)
|
||||||
return new_nodes
|
return new_nodes
|
||||||
|
|
||||||
def _copy_nodes(self, tos, nodes, until_line, line_offset):
|
def _copy_nodes(self, working_stack, nodes, until_line, line_offset, prefix=''):
|
||||||
new_nodes = []
|
new_nodes = []
|
||||||
|
|
||||||
new_tos = tos
|
new_prefix = ''
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
if node.type == 'endmarker':
|
if node.start_pos[0] > until_line:
|
||||||
# Endmarkers just distort all the checks below. Remove them.
|
|
||||||
break
|
break
|
||||||
|
|
||||||
if node.start_pos[0] > until_line:
|
if node.type == 'endmarker':
|
||||||
|
break
|
||||||
|
|
||||||
|
if node.type == 'error_leaf' and node.token_type in ('DEDENT', 'ERROR_DEDENT'):
|
||||||
break
|
break
|
||||||
# TODO this check might take a bit of time for large files. We
|
# TODO this check might take a bit of time for large files. We
|
||||||
# might want to change this to do more intelligent guessing or
|
# might want to change this to do more intelligent guessing or
|
||||||
# binary search.
|
# binary search.
|
||||||
if _get_last_line(node) > until_line:
|
if _get_last_line(node) > until_line:
|
||||||
# We can split up functions and classes later.
|
# We can split up functions and classes later.
|
||||||
if node.type in ('classdef', 'funcdef') and node.children[-1].type == 'suite':
|
if _func_or_class_has_suite(node):
|
||||||
new_nodes.append(node)
|
new_nodes.append(node)
|
||||||
break
|
break
|
||||||
|
|
||||||
new_nodes.append(node)
|
new_nodes.append(node)
|
||||||
|
|
||||||
if not new_nodes:
|
if not new_nodes:
|
||||||
return [], tos
|
return [], working_stack, prefix
|
||||||
|
|
||||||
|
tos = working_stack[-1]
|
||||||
last_node = new_nodes[-1]
|
last_node = new_nodes[-1]
|
||||||
line_offset_index = -1
|
had_valid_suite_last = False
|
||||||
if last_node.type in ('classdef', 'funcdef'):
|
if _func_or_class_has_suite(last_node):
|
||||||
suite = last_node.children[-1]
|
suite = last_node
|
||||||
if suite.type == 'suite':
|
while suite.type != 'suite':
|
||||||
suite_tos = _NodesStackNode(suite)
|
suite = suite.children[-1]
|
||||||
|
|
||||||
|
suite_tos = _NodesTreeNode(suite)
|
||||||
# Don't need to pass line_offset here, it's already done by the
|
# Don't need to pass line_offset here, it's already done by the
|
||||||
# parent.
|
# parent.
|
||||||
suite_nodes, recursive_tos = self._copy_nodes(
|
suite_nodes, new_working_stack, new_prefix = self._copy_nodes(
|
||||||
suite_tos, suite.children, until_line, line_offset)
|
working_stack + [suite_tos], suite.children, until_line, line_offset
|
||||||
|
)
|
||||||
if len(suite_nodes) < 2:
|
if len(suite_nodes) < 2:
|
||||||
# A suite only with newline is not valid.
|
# A suite only with newline is not valid.
|
||||||
new_nodes.pop()
|
new_nodes.pop()
|
||||||
|
new_prefix = ''
|
||||||
else:
|
else:
|
||||||
suite_tos.parent = tos
|
assert new_nodes
|
||||||
new_tos = recursive_tos
|
tos.add_child_node(suite_tos)
|
||||||
line_offset_index = -2
|
working_stack = new_working_stack
|
||||||
|
had_valid_suite_last = True
|
||||||
|
|
||||||
elif (new_nodes[-1].type in ('error_leaf', 'error_node') or
|
if new_nodes:
|
||||||
|
last_node = new_nodes[-1]
|
||||||
|
if (last_node.type in ('error_leaf', 'error_node') or
|
||||||
_is_flow_node(new_nodes[-1])):
|
_is_flow_node(new_nodes[-1])):
|
||||||
# Error leafs/nodes don't have a defined start/end. Error
|
# Error leafs/nodes don't have a defined start/end. Error
|
||||||
# nodes might not end with a newline (e.g. if there's an
|
# nodes might not end with a newline (e.g. if there's an
|
||||||
@@ -538,6 +661,7 @@ class _NodesStack(object):
|
|||||||
# If we copy flows at the end, they might be continued
|
# If we copy flows at the end, they might be continued
|
||||||
# after the copy limit (in the new parser).
|
# after the copy limit (in the new parser).
|
||||||
# In this while loop we try to remove until we find a newline.
|
# In this while loop we try to remove until we find a newline.
|
||||||
|
new_prefix = ''
|
||||||
new_nodes.pop()
|
new_nodes.pop()
|
||||||
while new_nodes:
|
while new_nodes:
|
||||||
last_node = new_nodes[-1]
|
last_node = new_nodes[-1]
|
||||||
@@ -546,34 +670,41 @@ class _NodesStack(object):
|
|||||||
new_nodes.pop()
|
new_nodes.pop()
|
||||||
|
|
||||||
if new_nodes:
|
if new_nodes:
|
||||||
try:
|
if not _ends_with_newline(new_nodes[-1].get_last_leaf()) and not had_valid_suite_last:
|
||||||
last_line_offset_leaf = new_nodes[line_offset_index].get_last_leaf()
|
p = new_nodes[-1].get_next_leaf().prefix
|
||||||
except IndexError:
|
# We are not allowed to remove the newline at the end of the
|
||||||
line_offset = 0
|
# line, otherwise it's going to be missing. This happens e.g.
|
||||||
# In this case we don't have to calculate an offset, because
|
# if a bracket is around before that moves newlines to
|
||||||
# there's no children to be managed.
|
# prefixes.
|
||||||
last_line_offset_leaf = None
|
new_prefix = split_lines(p, keepends=True)[0]
|
||||||
tos.add(new_nodes, line_offset, last_line_offset_leaf)
|
|
||||||
return new_nodes, new_tos
|
|
||||||
|
|
||||||
def _update_tos(self, tree_node):
|
if had_valid_suite_last:
|
||||||
if tree_node.type in ('suite', 'file_input'):
|
last = new_nodes[-1]
|
||||||
self._tos = _NodesStackNode(tree_node, self._tos)
|
if last.type == 'decorated':
|
||||||
self._tos.add(list(tree_node.children))
|
last = last.children[-1]
|
||||||
self._update_tos(tree_node.children[-1])
|
if last.type in ('async_funcdef', 'async_stmt'):
|
||||||
elif tree_node.type in ('classdef', 'funcdef'):
|
last = last.children[-1]
|
||||||
self._update_tos(tree_node.children[-1])
|
last_line_offset_leaf = last.children[-2].get_last_leaf()
|
||||||
|
assert last_line_offset_leaf == ':'
|
||||||
|
else:
|
||||||
|
last_line_offset_leaf = new_nodes[-1].get_last_leaf()
|
||||||
|
tos.add_tree_nodes(prefix, new_nodes, line_offset, last_line_offset_leaf)
|
||||||
|
prefix = new_prefix
|
||||||
|
self._prefix_remainder = ''
|
||||||
|
|
||||||
|
return new_nodes, working_stack, prefix
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
while self._tos is not None:
|
self._base_node.finish()
|
||||||
self._close_tos()
|
|
||||||
|
|
||||||
# Add an endmarker.
|
# Add an endmarker.
|
||||||
try:
|
try:
|
||||||
last_leaf = self._module.get_last_leaf()
|
last_leaf = self._module.get_last_leaf()
|
||||||
end_pos = list(last_leaf.end_pos)
|
|
||||||
except IndexError:
|
except IndexError:
|
||||||
end_pos = [1, 0]
|
end_pos = [1, 0]
|
||||||
|
else:
|
||||||
|
last_leaf = _skip_dedent_error_leaves(last_leaf)
|
||||||
|
end_pos = list(last_leaf.end_pos)
|
||||||
lines = split_lines(self.prefix)
|
lines = split_lines(self.prefix)
|
||||||
assert len(lines) > 0
|
assert len(lines) > 0
|
||||||
if len(lines) == 1:
|
if len(lines) == 1:
|
||||||
@@ -582,6 +713,6 @@ class _NodesStack(object):
|
|||||||
end_pos[0] += len(lines) - 1
|
end_pos[0] += len(lines) - 1
|
||||||
end_pos[1] = len(lines[-1])
|
end_pos[1] = len(lines[-1])
|
||||||
|
|
||||||
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._last_prefix)
|
endmarker = EndMarker('', tuple(end_pos), self.prefix + self._prefix_remainder)
|
||||||
endmarker.parent = self._module
|
endmarker.parent = self._module
|
||||||
self._module.children.append(endmarker)
|
self._module.children.append(endmarker)
|
||||||
|
|||||||
@@ -306,12 +306,12 @@ class ErrorFinder(Normalizer):
|
|||||||
|
|
||||||
def visit_leaf(self, leaf):
|
def visit_leaf(self, leaf):
|
||||||
if leaf.type == 'error_leaf':
|
if leaf.type == 'error_leaf':
|
||||||
if leaf.original_type in ('indent', 'error_dedent'):
|
if leaf.token_type in ('INDENT', 'ERROR_DEDENT'):
|
||||||
# Indents/Dedents itself never have a prefix. They are just
|
# Indents/Dedents itself never have a prefix. They are just
|
||||||
# "pseudo" tokens that get removed by the syntax tree later.
|
# "pseudo" tokens that get removed by the syntax tree later.
|
||||||
# Therefore in case of an error we also have to check for this.
|
# Therefore in case of an error we also have to check for this.
|
||||||
spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
|
spacing = list(leaf.get_next_leaf()._split_prefix())[-1]
|
||||||
if leaf.original_type == 'indent':
|
if leaf.token_type == 'INDENT':
|
||||||
message = 'unexpected indent'
|
message = 'unexpected indent'
|
||||||
else:
|
else:
|
||||||
message = 'unindent does not match any outer indentation level'
|
message = 'unindent does not match any outer indentation level'
|
||||||
@@ -563,17 +563,21 @@ class _ReturnAndYieldChecks(SyntaxRule):
|
|||||||
and self._normalizer.version == (3, 5):
|
and self._normalizer.version == (3, 5):
|
||||||
self.add_issue(self.get_node(leaf), message=self.message_async_yield)
|
self.add_issue(self.get_node(leaf), message=self.message_async_yield)
|
||||||
|
|
||||||
@ErrorFinder.register_rule(type='atom')
|
|
||||||
|
@ErrorFinder.register_rule(type='strings')
|
||||||
class _BytesAndStringMix(SyntaxRule):
|
class _BytesAndStringMix(SyntaxRule):
|
||||||
# e.g. 's' b''
|
# e.g. 's' b''
|
||||||
message = "cannot mix bytes and nonbytes literals"
|
message = "cannot mix bytes and nonbytes literals"
|
||||||
|
|
||||||
def _is_bytes_literal(self, string):
|
def _is_bytes_literal(self, string):
|
||||||
|
if string.type == 'fstring':
|
||||||
|
return False
|
||||||
return 'b' in string.string_prefix.lower()
|
return 'b' in string.string_prefix.lower()
|
||||||
|
|
||||||
def is_issue(self, node):
|
def is_issue(self, node):
|
||||||
first = node.children[0]
|
first = node.children[0]
|
||||||
if first.type == 'string' and self._normalizer.version >= (3, 0):
|
# In Python 2 it's allowed to mix bytes and unicode.
|
||||||
|
if self._normalizer.version >= (3, 0):
|
||||||
first_is_bytes = self._is_bytes_literal(first)
|
first_is_bytes = self._is_bytes_literal(first)
|
||||||
for string in node.children[1:]:
|
for string in node.children[1:]:
|
||||||
if first_is_bytes != self._is_bytes_literal(string):
|
if first_is_bytes != self._is_bytes_literal(string):
|
||||||
@@ -744,7 +748,12 @@ class _NonlocalModuleLevelRule(SyntaxRule):
|
|||||||
|
|
||||||
@ErrorFinder.register_rule(type='arglist')
|
@ErrorFinder.register_rule(type='arglist')
|
||||||
class _ArglistRule(SyntaxRule):
|
class _ArglistRule(SyntaxRule):
|
||||||
message = "Generator expression must be parenthesized if not sole argument"
|
@property
|
||||||
|
def message(self):
|
||||||
|
if self._normalizer.version < (3, 7):
|
||||||
|
return "Generator expression must be parenthesized if not sole argument"
|
||||||
|
else:
|
||||||
|
return "Generator expression must be parenthesized"
|
||||||
|
|
||||||
def is_issue(self, node):
|
def is_issue(self, node):
|
||||||
first_arg = node.children[0]
|
first_arg = node.children[0]
|
||||||
@@ -837,101 +846,36 @@ class _TryStmtRule(SyntaxRule):
|
|||||||
self.add_issue(default_except, message=self.message)
|
self.add_issue(default_except, message=self.message)
|
||||||
|
|
||||||
|
|
||||||
@ErrorFinder.register_rule(type='string')
|
@ErrorFinder.register_rule(type='fstring')
|
||||||
class _FStringRule(SyntaxRule):
|
class _FStringRule(SyntaxRule):
|
||||||
_fstring_grammar = None
|
_fstring_grammar = None
|
||||||
message_empty = "f-string: empty expression not allowed" # f'{}'
|
|
||||||
message_single_closing = "f-string: single '}' is not allowed" # f'}'
|
|
||||||
message_nested = "f-string: expressions nested too deeply"
|
message_nested = "f-string: expressions nested too deeply"
|
||||||
message_backslash = "f-string expression part cannot include a backslash" # f'{"\"}' or f'{"\\"}'
|
|
||||||
message_comment = "f-string expression part cannot include '#'" # f'{#}'
|
|
||||||
message_unterminated_string = "f-string: unterminated string" # f'{"}'
|
|
||||||
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
||||||
message_incomplete = "f-string: expecting '}'" # f'{'
|
|
||||||
message_syntax = "invalid syntax"
|
|
||||||
|
|
||||||
@classmethod
|
def _check_format_spec(self, format_spec, depth):
|
||||||
def _load_grammar(cls):
|
self._check_fstring_contents(format_spec.children[1:], depth)
|
||||||
import parso
|
|
||||||
|
|
||||||
if cls._fstring_grammar is None:
|
def _check_fstring_expr(self, fstring_expr, depth):
|
||||||
cls._fstring_grammar = parso.load_grammar(language='python-f-string')
|
if depth >= 2:
|
||||||
return cls._fstring_grammar
|
self.add_issue(fstring_expr, message=self.message_nested)
|
||||||
|
|
||||||
|
conversion = fstring_expr.children[2]
|
||||||
|
if conversion.type == 'fstring_conversion':
|
||||||
|
name = conversion.children[1]
|
||||||
|
if name.value not in ('s', 'r', 'a'):
|
||||||
|
self.add_issue(name, message=self.message_conversion)
|
||||||
|
|
||||||
|
format_spec = fstring_expr.children[-2]
|
||||||
|
if format_spec.type == 'fstring_format_spec':
|
||||||
|
self._check_format_spec(format_spec, depth + 1)
|
||||||
|
|
||||||
def is_issue(self, fstring):
|
def is_issue(self, fstring):
|
||||||
if 'f' not in fstring.string_prefix.lower():
|
self._check_fstring_contents(fstring.children[1:-1])
|
||||||
return
|
|
||||||
|
|
||||||
parsed = self._load_grammar().parse_leaf(fstring)
|
def _check_fstring_contents(self, children, depth=0):
|
||||||
for child in parsed.children:
|
for fstring_content in children:
|
||||||
if child.type == 'expression':
|
if fstring_content.type == 'fstring_expr':
|
||||||
self._check_expression(child)
|
self._check_fstring_expr(fstring_content, depth)
|
||||||
elif child.type == 'error_node':
|
|
||||||
next_ = child.get_next_leaf()
|
|
||||||
if next_.type == 'error_leaf' and next_.original_type == 'unterminated_string':
|
|
||||||
self.add_issue(next_, message=self.message_unterminated_string)
|
|
||||||
# At this point nothing more is comming except the error
|
|
||||||
# leaf that we've already checked here.
|
|
||||||
break
|
|
||||||
self.add_issue(child, message=self.message_incomplete)
|
|
||||||
elif child.type == 'error_leaf':
|
|
||||||
self.add_issue(child, message=self.message_single_closing)
|
|
||||||
|
|
||||||
def _check_python_expr(self, python_expr):
|
|
||||||
value = python_expr.value
|
|
||||||
if '\\' in value:
|
|
||||||
self.add_issue(python_expr, message=self.message_backslash)
|
|
||||||
return
|
|
||||||
if '#' in value:
|
|
||||||
self.add_issue(python_expr, message=self.message_comment)
|
|
||||||
return
|
|
||||||
if re.match('\s*$', value) is not None:
|
|
||||||
self.add_issue(python_expr, message=self.message_empty)
|
|
||||||
return
|
|
||||||
|
|
||||||
# This is now nested parsing. We parsed the fstring and now
|
|
||||||
# we're parsing Python again.
|
|
||||||
try:
|
|
||||||
# CPython has a bit of a special ways to parse Python code within
|
|
||||||
# f-strings. It wraps the code in brackets to make sure that
|
|
||||||
# whitespace doesn't make problems (indentation/newlines).
|
|
||||||
# Just use that algorithm as well here and adapt start positions.
|
|
||||||
start_pos = python_expr.start_pos
|
|
||||||
start_pos = start_pos[0], start_pos[1] - 1
|
|
||||||
eval_input = self._normalizer.grammar._parse(
|
|
||||||
'(%s)' % value,
|
|
||||||
start_symbol='eval_input',
|
|
||||||
start_pos=start_pos,
|
|
||||||
error_recovery=False
|
|
||||||
)
|
|
||||||
except ParserSyntaxError as e:
|
|
||||||
self.add_issue(e.error_leaf, message=self.message_syntax)
|
|
||||||
return
|
|
||||||
|
|
||||||
issues = self._normalizer.grammar.iter_errors(eval_input)
|
|
||||||
self._normalizer.issues += issues
|
|
||||||
|
|
||||||
def _check_format_spec(self, format_spec):
|
|
||||||
for expression in format_spec.children[1:]:
|
|
||||||
nested_format_spec = expression.children[-2]
|
|
||||||
if nested_format_spec.type == 'format_spec':
|
|
||||||
if len(nested_format_spec.children) > 1:
|
|
||||||
self.add_issue(
|
|
||||||
nested_format_spec.children[1],
|
|
||||||
message=self.message_nested
|
|
||||||
)
|
|
||||||
|
|
||||||
self._check_expression(expression)
|
|
||||||
|
|
||||||
def _check_expression(self, expression):
|
|
||||||
for c in expression.children:
|
|
||||||
if c.type == 'python_expr':
|
|
||||||
self._check_python_expr(c)
|
|
||||||
elif c.type == 'conversion':
|
|
||||||
if c.value not in ('s', 'r', 'a'):
|
|
||||||
self.add_issue(c, message=self.message_conversion)
|
|
||||||
elif c.type == 'format_spec':
|
|
||||||
self._check_format_spec(c)
|
|
||||||
|
|
||||||
|
|
||||||
class _CheckAssignmentRule(SyntaxRule):
|
class _CheckAssignmentRule(SyntaxRule):
|
||||||
@@ -944,7 +888,7 @@ class _CheckAssignmentRule(SyntaxRule):
|
|||||||
first, second = node.children[:2]
|
first, second = node.children[:2]
|
||||||
error = _get_comprehension_type(node)
|
error = _get_comprehension_type(node)
|
||||||
if error is None:
|
if error is None:
|
||||||
if second.type in ('dictorsetmaker', 'string'):
|
if second.type == 'dictorsetmaker':
|
||||||
error = 'literal'
|
error = 'literal'
|
||||||
elif first in ('(', '['):
|
elif first in ('(', '['):
|
||||||
if second.type == 'yield_expr':
|
if second.type == 'yield_expr':
|
||||||
@@ -963,7 +907,7 @@ class _CheckAssignmentRule(SyntaxRule):
|
|||||||
error = 'Ellipsis'
|
error = 'Ellipsis'
|
||||||
elif type_ == 'comparison':
|
elif type_ == 'comparison':
|
||||||
error = 'comparison'
|
error = 'comparison'
|
||||||
elif type_ in ('string', 'number'):
|
elif type_ in ('string', 'number', 'strings'):
|
||||||
error = 'literal'
|
error = 'literal'
|
||||||
elif type_ == 'yield_expr':
|
elif type_ == 'yield_expr':
|
||||||
# This one seems to be a slightly different warning in Python.
|
# This one seems to be a slightly different warning in Python.
|
||||||
|
|||||||
@@ -1,211 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from itertools import count
|
|
||||||
from parso.utils import PythonVersionInfo
|
|
||||||
from parso.utils import split_lines
|
|
||||||
from parso.python.tokenize import Token
|
|
||||||
from parso import parser
|
|
||||||
from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
|
|
||||||
|
|
||||||
version36 = PythonVersionInfo(3, 6)
|
|
||||||
|
|
||||||
|
|
||||||
class TokenNamespace:
|
|
||||||
_c = count()
|
|
||||||
LBRACE = next(_c)
|
|
||||||
RBRACE = next(_c)
|
|
||||||
ENDMARKER = next(_c)
|
|
||||||
COLON = next(_c)
|
|
||||||
CONVERSION = next(_c)
|
|
||||||
PYTHON_EXPR = next(_c)
|
|
||||||
EXCLAMATION_MARK = next(_c)
|
|
||||||
UNTERMINATED_STRING = next(_c)
|
|
||||||
|
|
||||||
token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def generate_token_id(cls, string):
|
|
||||||
if string == '{':
|
|
||||||
return cls.LBRACE
|
|
||||||
elif string == '}':
|
|
||||||
return cls.RBRACE
|
|
||||||
elif string == '!':
|
|
||||||
return cls.EXCLAMATION_MARK
|
|
||||||
elif string == ':':
|
|
||||||
return cls.COLON
|
|
||||||
return getattr(cls, string)
|
|
||||||
|
|
||||||
|
|
||||||
GRAMMAR = """
|
|
||||||
fstring: expression* ENDMARKER
|
|
||||||
format_spec: ':' expression*
|
|
||||||
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
|
|
||||||
"""
|
|
||||||
|
|
||||||
_prefix = r'((?:[^{}]+)*)'
|
|
||||||
_expr = _prefix + r'(\{|\}|$)'
|
|
||||||
_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
|
|
||||||
# There's only one conversion character allowed. But the rules have to be
|
|
||||||
# checked later anyway, so allow more here. This makes error recovery nicer.
|
|
||||||
_conversion = r'([^={}:]*)(.?)'
|
|
||||||
|
|
||||||
_compiled_expr = re.compile(_expr)
|
|
||||||
_compiled_in_expr = re.compile(_in_expr)
|
|
||||||
_compiled_conversion = re.compile(_conversion)
|
|
||||||
|
|
||||||
|
|
||||||
def tokenize(code, start_pos=(1, 0)):
|
|
||||||
def add_to_pos(string):
|
|
||||||
lines = split_lines(string)
|
|
||||||
l = len(lines[-1])
|
|
||||||
if len(lines) > 1:
|
|
||||||
start_pos[0] += len(lines) - 1
|
|
||||||
start_pos[1] = l
|
|
||||||
else:
|
|
||||||
start_pos[1] += l
|
|
||||||
|
|
||||||
def tok(value, type=None, prefix=''):
|
|
||||||
if type is None:
|
|
||||||
type = TokenNamespace.generate_token_id(value)
|
|
||||||
|
|
||||||
add_to_pos(prefix)
|
|
||||||
token = Token(type, value, tuple(start_pos), prefix)
|
|
||||||
add_to_pos(value)
|
|
||||||
return token
|
|
||||||
|
|
||||||
start = 0
|
|
||||||
recursion_level = 0
|
|
||||||
added_prefix = ''
|
|
||||||
start_pos = list(start_pos)
|
|
||||||
while True:
|
|
||||||
match = _compiled_expr.match(code, start)
|
|
||||||
prefix = added_prefix + match.group(1)
|
|
||||||
found = match.group(2)
|
|
||||||
start = match.end()
|
|
||||||
if not found:
|
|
||||||
# We're at the end.
|
|
||||||
break
|
|
||||||
|
|
||||||
if found == '}':
|
|
||||||
if recursion_level == 0 and len(code) > start and code[start] == '}':
|
|
||||||
# This is a }} escape.
|
|
||||||
added_prefix = prefix + '}}'
|
|
||||||
start += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
recursion_level = max(0, recursion_level - 1)
|
|
||||||
yield tok(found, prefix=prefix)
|
|
||||||
added_prefix = ''
|
|
||||||
else:
|
|
||||||
assert found == '{'
|
|
||||||
if recursion_level == 0 and len(code) > start and code[start] == '{':
|
|
||||||
# This is a {{ escape.
|
|
||||||
added_prefix = prefix + '{{'
|
|
||||||
start += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
recursion_level += 1
|
|
||||||
yield tok(found, prefix=prefix)
|
|
||||||
added_prefix = ''
|
|
||||||
|
|
||||||
expression = ''
|
|
||||||
squared_count = 0
|
|
||||||
curly_count = 0
|
|
||||||
while True:
|
|
||||||
expr_match = _compiled_in_expr.match(code, start)
|
|
||||||
expression += expr_match.group(1)
|
|
||||||
found = expr_match.group(2)
|
|
||||||
start = expr_match.end()
|
|
||||||
|
|
||||||
if found == '{':
|
|
||||||
curly_count += 1
|
|
||||||
expression += found
|
|
||||||
elif found == '}' and curly_count > 0:
|
|
||||||
curly_count -= 1
|
|
||||||
expression += found
|
|
||||||
elif found == '[':
|
|
||||||
squared_count += 1
|
|
||||||
expression += found
|
|
||||||
elif found == ']':
|
|
||||||
# Use a max function here, because the Python code might
|
|
||||||
# just have syntax errors.
|
|
||||||
squared_count = max(0, squared_count - 1)
|
|
||||||
expression += found
|
|
||||||
elif found == ':' and (squared_count or curly_count):
|
|
||||||
expression += found
|
|
||||||
elif found in ('"', "'"):
|
|
||||||
search = found
|
|
||||||
if len(code) > start + 1 and \
|
|
||||||
code[start] == found == code[start+1]:
|
|
||||||
search *= 3
|
|
||||||
start += 2
|
|
||||||
|
|
||||||
index = code.find(search, start)
|
|
||||||
if index == -1:
|
|
||||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
|
||||||
yield tok(
|
|
||||||
found + code[start:],
|
|
||||||
type=TokenNamespace.UNTERMINATED_STRING,
|
|
||||||
)
|
|
||||||
start = len(code)
|
|
||||||
break
|
|
||||||
expression += found + code[start:index+1]
|
|
||||||
start = index + 1
|
|
||||||
elif found == '!' and len(code) > start and code[start] == '=':
|
|
||||||
# This is a python `!=` and not a conversion.
|
|
||||||
expression += found
|
|
||||||
else:
|
|
||||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
|
||||||
if found:
|
|
||||||
yield tok(found)
|
|
||||||
break
|
|
||||||
|
|
||||||
if found == '!':
|
|
||||||
conversion_match = _compiled_conversion.match(code, start)
|
|
||||||
found = conversion_match.group(2)
|
|
||||||
start = conversion_match.end()
|
|
||||||
yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
|
|
||||||
if found:
|
|
||||||
yield tok(found)
|
|
||||||
if found == '}':
|
|
||||||
recursion_level -= 1
|
|
||||||
|
|
||||||
# We don't need to handle everything after ':', because that is
|
|
||||||
# basically new tokens.
|
|
||||||
|
|
||||||
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
|
|
||||||
|
|
||||||
|
|
||||||
class Parser(parser.BaseParser):
|
|
||||||
def parse(self, tokens):
|
|
||||||
node = super(Parser, self).parse(tokens)
|
|
||||||
if isinstance(node, self.default_leaf): # Is an endmarker.
|
|
||||||
# If there's no curly braces we get back a non-module. We always
|
|
||||||
# want an fstring.
|
|
||||||
node = self.default_node('fstring', [node])
|
|
||||||
|
|
||||||
return node
|
|
||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
|
||||||
# TODO this is so ugly.
|
|
||||||
leaf_type = TokenNamespace.token_map[type].lower()
|
|
||||||
return TypedLeaf(leaf_type, value, start_pos, prefix)
|
|
||||||
|
|
||||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
|
||||||
add_token_callback):
|
|
||||||
if not self._error_recovery:
|
|
||||||
return super(Parser, self).error_recovery(
|
|
||||||
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
|
||||||
add_token_callback
|
|
||||||
)
|
|
||||||
|
|
||||||
token_type = TokenNamespace.token_map[typ].lower()
|
|
||||||
if len(stack) == 1:
|
|
||||||
error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
|
|
||||||
stack[0][2][1].append(error_leaf)
|
|
||||||
else:
|
|
||||||
dfa, state, (type_, nodes) = stack[1]
|
|
||||||
stack[0][2][1].append(ErrorNode(nodes))
|
|
||||||
stack[1:] = []
|
|
||||||
|
|
||||||
add_token_callback(typ, value, start_pos, prefix)
|
|
||||||
@@ -119,7 +119,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
|
|||||||
'[' [listmaker] ']' |
|
'[' [listmaker] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
'`' testlist1 '`' |
|
'`' testlist1 '`' |
|
||||||
NAME | NUMBER | STRING+)
|
NAME | NUMBER | strings)
|
||||||
|
strings: STRING+
|
||||||
listmaker: test ( list_for | (',' test)* [','] )
|
listmaker: test ( list_for | (',' test)* [','] )
|
||||||
# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
|
# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
|
||||||
# default. It's more consistent like this.
|
# default. It's more consistent like this.
|
||||||
|
|||||||
@@ -104,7 +104,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
|
|||||||
'[' [listmaker] ']' |
|
'[' [listmaker] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
'`' testlist1 '`' |
|
'`' testlist1 '`' |
|
||||||
NAME | NUMBER | STRING+)
|
NAME | NUMBER | strings)
|
||||||
|
strings: STRING+
|
||||||
listmaker: test ( list_for | (',' test)* [','] )
|
listmaker: test ( list_for | (',' test)* [','] )
|
||||||
testlist_comp: test ( comp_for | (',' test)* [','] )
|
testlist_comp: test ( comp_for | (',' test)* [','] )
|
||||||
lambdef: 'lambda' [varargslist] ':' test
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
|||||||
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
|
|||||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
'[' [testlist_comp] ']' |
|
'[' [testlist_comp] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||||
|
strings: STRING+
|
||||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
subscriptlist: subscript (',' subscript)* [',']
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
|||||||
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
|
|||||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
'[' [testlist_comp] ']' |
|
'[' [testlist_comp] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||||
|
strings: STRING+
|
||||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
subscriptlist: subscript (',' subscript)* [',']
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
|||||||
@@ -110,7 +110,8 @@ atom_expr: ['await'] atom trailer*
|
|||||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
'[' [testlist_comp] ']' |
|
'[' [testlist_comp] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||||
|
strings: STRING+
|
||||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
subscriptlist: subscript (',' subscript)* [',']
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
|
|||||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
'[' [testlist_comp] ']' |
|
'[' [testlist_comp] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
subscriptlist: subscript (',' subscript)* [',']
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
@@ -148,3 +148,10 @@ encoding_decl: NAME
|
|||||||
|
|
||||||
yield_expr: 'yield' [yield_arg]
|
yield_expr: 'yield' [yield_arg]
|
||||||
yield_arg: 'from' test | testlist
|
yield_arg: 'from' test | testlist
|
||||||
|
|
||||||
|
strings: (STRING | fstring)+
|
||||||
|
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||||
|
fstring_content: FSTRING_STRING | fstring_expr
|
||||||
|
fstring_conversion: '!' NAME
|
||||||
|
fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||||
|
fstring_format_spec: ':' fstring_content*
|
||||||
|
|||||||
@@ -15,8 +15,6 @@ decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
|||||||
decorators: decorator+
|
decorators: decorator+
|
||||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||||
|
|
||||||
# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
|
|
||||||
# skipping python3.5+ compatibility, in favour of 3.7 solution
|
|
||||||
async_funcdef: 'async' funcdef
|
async_funcdef: 'async' funcdef
|
||||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
|
||||||
@@ -108,7 +106,7 @@ atom_expr: ['await'] atom trailer*
|
|||||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
'[' [testlist_comp] ']' |
|
'[' [testlist_comp] ']' |
|
||||||
'{' [dictorsetmaker] '}' |
|
'{' [dictorsetmaker] '}' |
|
||||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
subscriptlist: subscript (',' subscript)* [',']
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
@@ -148,3 +146,10 @@ encoding_decl: NAME
|
|||||||
|
|
||||||
yield_expr: 'yield' [yield_arg]
|
yield_expr: 'yield' [yield_arg]
|
||||||
yield_arg: 'from' test | testlist
|
yield_arg: 'from' test | testlist
|
||||||
|
|
||||||
|
strings: (STRING | fstring)+
|
||||||
|
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||||
|
fstring_content: FSTRING_STRING | fstring_expr
|
||||||
|
fstring_conversion: '!' NAME
|
||||||
|
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||||
|
fstring_format_spec: ':' fstring_content*
|
||||||
|
|||||||
157
parso/python/grammar38.txt
Normal file
157
parso/python/grammar38.txt
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
# Grammar for Python
|
||||||
|
|
||||||
|
# NOTE WELL: You should also follow all the steps listed at
|
||||||
|
# https://devguide.python.org/grammar/
|
||||||
|
|
||||||
|
# Start symbols for the grammar:
|
||||||
|
# single_input is a single interactive statement;
|
||||||
|
# file_input is a module or sequence of commands read from an input file;
|
||||||
|
# eval_input is the input for the eval() functions.
|
||||||
|
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||||
|
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||||
|
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||||
|
eval_input: testlist NEWLINE* ENDMARKER
|
||||||
|
|
||||||
|
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||||
|
decorators: decorator+
|
||||||
|
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||||
|
|
||||||
|
async_funcdef: 'async' funcdef
|
||||||
|
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||||
|
|
||||||
|
parameters: '(' [typedargslist] ')'
|
||||||
|
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||||
|
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||||
|
| '**' tfpdef [',']]]
|
||||||
|
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||||
|
| '**' tfpdef [','])
|
||||||
|
tfpdef: NAME [':' test]
|
||||||
|
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||||
|
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||||
|
| '**' vfpdef [',']]]
|
||||||
|
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||||
|
| '**' vfpdef [',']
|
||||||
|
)
|
||||||
|
vfpdef: NAME
|
||||||
|
|
||||||
|
stmt: simple_stmt | compound_stmt
|
||||||
|
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||||
|
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||||
|
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||||
|
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||||
|
('=' (yield_expr|testlist_star_expr))*)
|
||||||
|
annassign: ':' test ['=' test]
|
||||||
|
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||||
|
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||||
|
'<<=' | '>>=' | '**=' | '//=')
|
||||||
|
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||||
|
del_stmt: 'del' exprlist
|
||||||
|
pass_stmt: 'pass'
|
||||||
|
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||||
|
break_stmt: 'break'
|
||||||
|
continue_stmt: 'continue'
|
||||||
|
return_stmt: 'return' [testlist_star_expr]
|
||||||
|
yield_stmt: yield_expr
|
||||||
|
raise_stmt: 'raise' [test ['from' test]]
|
||||||
|
import_stmt: import_name | import_from
|
||||||
|
import_name: 'import' dotted_as_names
|
||||||
|
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||||
|
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||||
|
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||||
|
import_as_name: NAME ['as' NAME]
|
||||||
|
dotted_as_name: dotted_name ['as' NAME]
|
||||||
|
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||||
|
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||||
|
dotted_name: NAME ('.' NAME)*
|
||||||
|
global_stmt: 'global' NAME (',' NAME)*
|
||||||
|
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||||
|
assert_stmt: 'assert' test [',' test]
|
||||||
|
|
||||||
|
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||||
|
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||||
|
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||||
|
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||||
|
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||||
|
try_stmt: ('try' ':' suite
|
||||||
|
((except_clause ':' suite)+
|
||||||
|
['else' ':' suite]
|
||||||
|
['finally' ':' suite] |
|
||||||
|
'finally' ':' suite))
|
||||||
|
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||||
|
with_item: test ['as' expr]
|
||||||
|
# NB compile.c makes sure that the default except clause is last
|
||||||
|
except_clause: 'except' [test ['as' NAME]]
|
||||||
|
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||||
|
|
||||||
|
test: or_test ['if' or_test 'else' test] | lambdef
|
||||||
|
test_nocond: or_test | lambdef_nocond
|
||||||
|
lambdef: 'lambda' [varargslist] ':' test
|
||||||
|
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||||
|
or_test: and_test ('or' and_test)*
|
||||||
|
and_test: not_test ('and' not_test)*
|
||||||
|
not_test: 'not' not_test | comparison
|
||||||
|
comparison: expr (comp_op expr)*
|
||||||
|
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||||
|
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||||
|
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||||
|
star_expr: '*' expr
|
||||||
|
expr: xor_expr ('|' xor_expr)*
|
||||||
|
xor_expr: and_expr ('^' and_expr)*
|
||||||
|
and_expr: shift_expr ('&' shift_expr)*
|
||||||
|
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||||
|
arith_expr: term (('+'|'-') term)*
|
||||||
|
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||||
|
factor: ('+'|'-'|'~') factor | power
|
||||||
|
power: atom_expr ['**' factor]
|
||||||
|
atom_expr: ['await'] atom trailer*
|
||||||
|
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||||
|
'[' [testlist_comp] ']' |
|
||||||
|
'{' [dictorsetmaker] '}' |
|
||||||
|
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||||
|
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||||
|
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||||
|
subscriptlist: subscript (',' subscript)* [',']
|
||||||
|
subscript: test | [test] ':' [test] [sliceop]
|
||||||
|
sliceop: ':' [test]
|
||||||
|
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||||
|
testlist: test (',' test)* [',']
|
||||||
|
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||||
|
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||||
|
((test | star_expr)
|
||||||
|
(comp_for | (',' (test | star_expr))* [','])) )
|
||||||
|
|
||||||
|
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||||
|
|
||||||
|
arglist: argument (',' argument)* [',']
|
||||||
|
|
||||||
|
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||||
|
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||||
|
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||||
|
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||||
|
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||||
|
# we explicitly match '*' here, too, to give it proper precedence.
|
||||||
|
# Illegal combinations and orderings are blocked in ast.c:
|
||||||
|
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||||
|
# that precede iterable unpackings are blocked; etc.
|
||||||
|
argument: ( test [comp_for] |
|
||||||
|
test '=' test |
|
||||||
|
'**' test |
|
||||||
|
'*' test )
|
||||||
|
|
||||||
|
comp_iter: comp_for | comp_if
|
||||||
|
sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
|
||||||
|
comp_for: ['async'] sync_comp_for
|
||||||
|
comp_if: 'if' test_nocond [comp_iter]
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
||||||
|
yield_expr: 'yield' [yield_arg]
|
||||||
|
yield_arg: 'from' test | testlist_star_expr
|
||||||
|
|
||||||
|
strings: (STRING | fstring)+
|
||||||
|
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||||
|
fstring_content: FSTRING_STRING | fstring_expr
|
||||||
|
fstring_conversion: '!' NAME
|
||||||
|
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||||
|
fstring_format_spec: ':' fstring_content*
|
||||||
@@ -1,8 +1,11 @@
|
|||||||
from parso.python import tree
|
from parso.python import tree
|
||||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
from parso.python.token import PythonTokenTypes
|
||||||
STRING, tok_name, NAME)
|
|
||||||
from parso.parser import BaseParser
|
from parso.parser import BaseParser
|
||||||
from parso.pgen2.parse import token_to_ilabel
|
|
||||||
|
|
||||||
|
NAME = PythonTokenTypes.NAME
|
||||||
|
INDENT = PythonTokenTypes.INDENT
|
||||||
|
DEDENT = PythonTokenTypes.DEDENT
|
||||||
|
|
||||||
|
|
||||||
class Parser(BaseParser):
|
class Parser(BaseParser):
|
||||||
@@ -50,44 +53,35 @@ class Parser(BaseParser):
|
|||||||
}
|
}
|
||||||
default_node = tree.PythonNode
|
default_node = tree.PythonNode
|
||||||
|
|
||||||
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
|
# Names/Keywords are handled separately
|
||||||
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
|
_leaf_map = {
|
||||||
|
PythonTokenTypes.STRING: tree.String,
|
||||||
|
PythonTokenTypes.NUMBER: tree.Number,
|
||||||
|
PythonTokenTypes.NEWLINE: tree.Newline,
|
||||||
|
PythonTokenTypes.ENDMARKER: tree.EndMarker,
|
||||||
|
PythonTokenTypes.FSTRING_STRING: tree.FStringString,
|
||||||
|
PythonTokenTypes.FSTRING_START: tree.FStringStart,
|
||||||
|
PythonTokenTypes.FSTRING_END: tree.FStringEnd,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
|
||||||
|
super(Parser, self).__init__(pgen_grammar, start_nonterminal,
|
||||||
|
error_recovery=error_recovery)
|
||||||
|
|
||||||
self.syntax_errors = []
|
self.syntax_errors = []
|
||||||
self._omit_dedent_list = []
|
self._omit_dedent_list = []
|
||||||
self._indent_counter = 0
|
self._indent_counter = 0
|
||||||
|
|
||||||
# TODO do print absolute import detection here.
|
|
||||||
# try:
|
|
||||||
# del python_grammar_no_print_statement.keywords["print"]
|
|
||||||
# except KeyError:
|
|
||||||
# pass # Doesn't exist in the Python 3 grammar.
|
|
||||||
|
|
||||||
# if self.options["print_function"]:
|
|
||||||
# python_grammar = pygram.python_grammar_no_print_statement
|
|
||||||
# else:
|
|
||||||
|
|
||||||
def parse(self, tokens):
|
def parse(self, tokens):
|
||||||
if self._error_recovery:
|
if self._error_recovery:
|
||||||
if self._start_symbol != 'file_input':
|
if self._start_nonterminal != 'file_input':
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
tokens = self._recovery_tokenize(tokens)
|
tokens = self._recovery_tokenize(tokens)
|
||||||
|
|
||||||
node = super(Parser, self).parse(tokens)
|
return super(Parser, self).parse(tokens)
|
||||||
|
|
||||||
if self._start_symbol == 'file_input' != node.type:
|
def convert_node(self, nonterminal, children):
|
||||||
# If there's only one statement, we get back a non-module. That's
|
|
||||||
# not what we want, we want a module, so we add it here:
|
|
||||||
node = self.convert_node(
|
|
||||||
self._pgen_grammar,
|
|
||||||
self._pgen_grammar.symbol2number['file_input'],
|
|
||||||
[node]
|
|
||||||
)
|
|
||||||
|
|
||||||
return node
|
|
||||||
|
|
||||||
def convert_node(self, pgen_grammar, type, children):
|
|
||||||
"""
|
"""
|
||||||
Convert raw node information to a PythonBaseNode instance.
|
Convert raw node information to a PythonBaseNode instance.
|
||||||
|
|
||||||
@@ -95,158 +89,121 @@ class Parser(BaseParser):
|
|||||||
grammar rule produces a new complete node, so that the tree is build
|
grammar rule produces a new complete node, so that the tree is build
|
||||||
strictly bottom-up.
|
strictly bottom-up.
|
||||||
"""
|
"""
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
|
||||||
symbol = pgen_grammar.number2symbol[type]
|
|
||||||
try:
|
try:
|
||||||
return self.node_map[symbol](children)
|
node = self.node_map[nonterminal](children)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if symbol == 'suite':
|
if nonterminal == 'suite':
|
||||||
# We don't want the INDENT/DEDENT in our parser tree. Those
|
# We don't want the INDENT/DEDENT in our parser tree. Those
|
||||||
# leaves are just cancer. They are virtual leaves and not real
|
# leaves are just cancer. They are virtual leaves and not real
|
||||||
# ones and therefore have pseudo start/end positions and no
|
# ones and therefore have pseudo start/end positions and no
|
||||||
# prefixes. Just ignore them.
|
# prefixes. Just ignore them.
|
||||||
children = [children[0]] + children[2:-1]
|
children = [children[0]] + children[2:-1]
|
||||||
elif symbol == 'list_if':
|
elif nonterminal == 'list_if':
|
||||||
# Make transitioning from 2 to 3 easier.
|
# Make transitioning from 2 to 3 easier.
|
||||||
symbol = 'comp_if'
|
nonterminal = 'comp_if'
|
||||||
elif symbol == 'listmaker':
|
elif nonterminal == 'listmaker':
|
||||||
# Same as list_if above.
|
# Same as list_if above.
|
||||||
symbol = 'testlist_comp'
|
nonterminal = 'testlist_comp'
|
||||||
return self.default_node(symbol, children)
|
node = self.default_node(nonterminal, children)
|
||||||
|
for c in children:
|
||||||
|
c.parent = node
|
||||||
|
return node
|
||||||
|
|
||||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
def convert_leaf(self, type, value, prefix, start_pos):
|
||||||
# print('leaf', repr(value), token.tok_name[type])
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
if type == NAME:
|
if type == NAME:
|
||||||
if value in pgen_grammar.keywords:
|
if value in self._pgen_grammar.reserved_syntax_strings:
|
||||||
return tree.Keyword(value, start_pos, prefix)
|
return tree.Keyword(value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
return tree.Name(value, start_pos, prefix)
|
return tree.Name(value, start_pos, prefix)
|
||||||
elif type == STRING:
|
|
||||||
return tree.String(value, start_pos, prefix)
|
|
||||||
elif type == NUMBER:
|
|
||||||
return tree.Number(value, start_pos, prefix)
|
|
||||||
elif type == NEWLINE:
|
|
||||||
return tree.Newline(value, start_pos, prefix)
|
|
||||||
elif type == ENDMARKER:
|
|
||||||
return tree.EndMarker(value, start_pos, prefix)
|
|
||||||
else:
|
|
||||||
return tree.Operator(value, start_pos, prefix)
|
|
||||||
|
|
||||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
|
||||||
add_token_callback):
|
|
||||||
def get_symbol_and_nodes(stack):
|
|
||||||
for dfa, state, (type_, nodes) in stack:
|
|
||||||
symbol = pgen_grammar.number2symbol[type_]
|
|
||||||
yield symbol, nodes
|
|
||||||
|
|
||||||
tos_nodes = stack.get_tos_nodes()
|
def error_recovery(self, token):
|
||||||
|
tos_nodes = self.stack[-1].nodes
|
||||||
if tos_nodes:
|
if tos_nodes:
|
||||||
last_leaf = tos_nodes[-1].get_last_leaf()
|
last_leaf = tos_nodes[-1].get_last_leaf()
|
||||||
else:
|
else:
|
||||||
last_leaf = None
|
last_leaf = None
|
||||||
|
|
||||||
if self._start_symbol == 'file_input' and \
|
if self._start_nonterminal == 'file_input' and \
|
||||||
(typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
|
(token.type == PythonTokenTypes.ENDMARKER
|
||||||
def reduce_stack(states, newstate):
|
or token.type == DEDENT and '\n' not in last_leaf.value
|
||||||
# reduce
|
and '\r' not in last_leaf.value):
|
||||||
state = newstate
|
|
||||||
while states[state] == [(0, state)]:
|
|
||||||
self.pgen_parser._pop()
|
|
||||||
|
|
||||||
dfa, state, (type_, nodes) = stack[-1]
|
|
||||||
states, first = dfa
|
|
||||||
|
|
||||||
|
|
||||||
# In Python statements need to end with a newline. But since it's
|
# In Python statements need to end with a newline. But since it's
|
||||||
# possible (and valid in Python ) that there's no newline at the
|
# possible (and valid in Python ) that there's no newline at the
|
||||||
# end of a file, we have to recover even if the user doesn't want
|
# end of a file, we have to recover even if the user doesn't want
|
||||||
# error recovery.
|
# error recovery.
|
||||||
#print('x', pprint.pprint(stack))
|
if self.stack[-1].dfa.from_rule == 'simple_stmt':
|
||||||
ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
|
try:
|
||||||
|
plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
|
||||||
dfa, state, (type_, nodes) = stack[-1]
|
except KeyError:
|
||||||
symbol = pgen_grammar.number2symbol[type_]
|
pass
|
||||||
states, first = dfa
|
else:
|
||||||
arcs = states[state]
|
if plan.next_dfa.is_final and not plan.dfa_pushes:
|
||||||
# Look for a state with this label
|
# We are ignoring here that the newline would be
|
||||||
for i, newstate in arcs:
|
# required for a simple_stmt.
|
||||||
if ilabel == i:
|
self.stack[-1].dfa = plan.next_dfa
|
||||||
if symbol == 'simple_stmt':
|
self._add_token(token)
|
||||||
# This is basically shifting
|
|
||||||
stack[-1] = (dfa, newstate, (type_, nodes))
|
|
||||||
|
|
||||||
reduce_stack(states, newstate)
|
|
||||||
add_token_callback(typ, value, start_pos, prefix)
|
|
||||||
return
|
return
|
||||||
# Check if we're at the right point
|
|
||||||
#for symbol, nodes in get_symbol_and_nodes(stack):
|
|
||||||
# self.pgen_parser._pop()
|
|
||||||
|
|
||||||
#break
|
|
||||||
break
|
|
||||||
#symbol = pgen_grammar.number2symbol[type_]
|
|
||||||
|
|
||||||
if not self._error_recovery:
|
if not self._error_recovery:
|
||||||
return super(Parser, self).error_recovery(
|
return super(Parser, self).error_recovery(token)
|
||||||
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
|
||||||
add_token_callback)
|
|
||||||
|
|
||||||
def current_suite(stack):
|
def current_suite(stack):
|
||||||
# For now just discard everything that is not a suite or
|
# For now just discard everything that is not a suite or
|
||||||
# file_input, if we detect an error.
|
# file_input, if we detect an error.
|
||||||
for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
|
for until_index, stack_node in reversed(list(enumerate(stack))):
|
||||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||||
if symbol == 'file_input':
|
if stack_node.nonterminal == 'file_input':
|
||||||
break
|
break
|
||||||
elif symbol == 'suite' and len(nodes) > 1:
|
elif stack_node.nonterminal == 'suite':
|
||||||
# suites without an indent in them get discarded.
|
# In the case where we just have a newline we don't want to
|
||||||
|
# do error recovery here. In all other cases, we want to do
|
||||||
|
# error recovery.
|
||||||
|
if len(stack_node.nodes) != 1:
|
||||||
break
|
break
|
||||||
return index, symbol, nodes
|
return until_index
|
||||||
|
|
||||||
index, symbol, nodes = current_suite(stack)
|
until_index = current_suite(self.stack)
|
||||||
|
|
||||||
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
if self._stack_removal(until_index + 1):
|
||||||
if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
|
self._add_token(token)
|
||||||
add_token_callback(typ, value, start_pos, prefix)
|
|
||||||
else:
|
else:
|
||||||
|
typ, value, start_pos, prefix = token
|
||||||
if typ == INDENT:
|
if typ == INDENT:
|
||||||
# For every deleted INDENT we have to delete a DEDENT as well.
|
# For every deleted INDENT we have to delete a DEDENT as well.
|
||||||
# Otherwise the parser will get into trouble and DEDENT too early.
|
# Otherwise the parser will get into trouble and DEDENT too early.
|
||||||
self._omit_dedent_list.append(self._indent_counter)
|
self._omit_dedent_list.append(self._indent_counter)
|
||||||
|
|
||||||
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
|
||||||
stack[-1][2][1].append(error_leaf)
|
self.stack[-1].nodes.append(error_leaf)
|
||||||
|
|
||||||
if symbol == 'suite':
|
tos = self.stack[-1]
|
||||||
dfa, state, node = stack[-1]
|
if tos.nonterminal == 'suite':
|
||||||
states, first = dfa
|
# Need at least one statement in the suite. This happend with the
|
||||||
arcs = states[state]
|
# error recovery above.
|
||||||
intended_label = pgen_grammar.symbol2label['stmt']
|
try:
|
||||||
# Introduce a proper state transition. We're basically allowing
|
tos.dfa = tos.dfa.arcs['stmt']
|
||||||
# there to be no valid statements inside a suite.
|
except KeyError:
|
||||||
if [x[0] for x in arcs] == [intended_label]:
|
# We're already in a final state.
|
||||||
new_state = arcs[0][1]
|
pass
|
||||||
stack[-1] = dfa, new_state, node
|
|
||||||
|
|
||||||
def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
|
def _stack_removal(self, start_index):
|
||||||
failed_stack = False
|
all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
|
||||||
found = False
|
|
||||||
all_nodes = []
|
|
||||||
for dfa, state, (type_, nodes) in stack[start_index:]:
|
|
||||||
if nodes:
|
|
||||||
found = True
|
|
||||||
if found:
|
|
||||||
failed_stack = True
|
|
||||||
all_nodes += nodes
|
|
||||||
if failed_stack:
|
|
||||||
stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes))
|
|
||||||
|
|
||||||
stack[start_index:] = []
|
if all_nodes:
|
||||||
return failed_stack
|
node = tree.PythonErrorNode(all_nodes)
|
||||||
|
for n in all_nodes:
|
||||||
|
n.parent = node
|
||||||
|
self.stack[start_index - 1].nodes.append(node)
|
||||||
|
|
||||||
|
self.stack[start_index:] = []
|
||||||
|
return bool(all_nodes)
|
||||||
|
|
||||||
def _recovery_tokenize(self, tokens):
|
def _recovery_tokenize(self, tokens):
|
||||||
for typ, value, start_pos, prefix in tokens:
|
for token in tokens:
|
||||||
# print(tok_name[typ], repr(value), start_pos, repr(prefix))
|
typ = token[0]
|
||||||
if typ == DEDENT:
|
if typ == DEDENT:
|
||||||
# We need to count indents, because if we just omit any DEDENT,
|
# We need to count indents, because if we just omit any DEDENT,
|
||||||
# we might omit them in the wrong place.
|
# we might omit them in the wrong place.
|
||||||
@@ -258,4 +215,4 @@ class Parser(BaseParser):
|
|||||||
self._indent_counter -= 1
|
self._indent_counter -= 1
|
||||||
elif typ == INDENT:
|
elif typ == INDENT:
|
||||||
self._indent_counter += 1
|
self._indent_counter += 1
|
||||||
yield typ, value, start_pos, prefix
|
yield token
|
||||||
|
|||||||
@@ -391,11 +391,11 @@ class PEP8Normalizer(ErrorFinder):
|
|||||||
if value.lstrip('#'):
|
if value.lstrip('#'):
|
||||||
self.add_issue(part, 266, "Too many leading '#' for block comment.")
|
self.add_issue(part, 266, "Too many leading '#' for block comment.")
|
||||||
elif self._on_newline:
|
elif self._on_newline:
|
||||||
if not re.match('#:? ', value) and not value == '#' \
|
if not re.match(r'#:? ', value) and not value == '#' \
|
||||||
and not (value.startswith('#!') and part.start_pos == (1, 0)):
|
and not (value.startswith('#!') and part.start_pos == (1, 0)):
|
||||||
self.add_issue(part, 265, "Block comment should start with '# '")
|
self.add_issue(part, 265, "Block comment should start with '# '")
|
||||||
else:
|
else:
|
||||||
if not re.match('#:? [^ ]', value):
|
if not re.match(r'#:? [^ ]', value):
|
||||||
self.add_issue(part, 262, "Inline comment should start with '# '")
|
self.add_issue(part, 262, "Inline comment should start with '# '")
|
||||||
|
|
||||||
self._reset_newlines(spacing, leaf, is_comment=True)
|
self._reset_newlines(spacing, leaf, is_comment=True)
|
||||||
@@ -677,7 +677,7 @@ class PEP8Normalizer(ErrorFinder):
|
|||||||
elif typ == 'string':
|
elif typ == 'string':
|
||||||
# Checking multiline strings
|
# Checking multiline strings
|
||||||
for i, line in enumerate(leaf.value.splitlines()[1:]):
|
for i, line in enumerate(leaf.value.splitlines()[1:]):
|
||||||
indentation = re.match('[ \t]*', line).group(0)
|
indentation = re.match(r'[ \t]*', line).group(0)
|
||||||
start_pos = leaf.line + i, len(indentation)
|
start_pos = leaf.line + i, len(indentation)
|
||||||
# TODO check multiline indentation.
|
# TODO check multiline indentation.
|
||||||
elif typ == 'endmarker':
|
elif typ == 'endmarker':
|
||||||
|
|||||||
@@ -1,104 +1,27 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from itertools import count
|
|
||||||
from token import *
|
|
||||||
|
|
||||||
from parso._compatibility import py_version
|
|
||||||
|
|
||||||
|
|
||||||
_counter = count(N_TOKENS)
|
class TokenType(object):
|
||||||
# Never want to see this thing again.
|
def __init__(self, name, contains_syntax=False):
|
||||||
del N_TOKENS
|
self.name = name
|
||||||
|
self.contains_syntax = contains_syntax
|
||||||
|
|
||||||
COMMENT = next(_counter)
|
def __repr__(self):
|
||||||
tok_name[COMMENT] = 'COMMENT'
|
return '%s(%s)' % (self.__class__.__name__, self.name)
|
||||||
|
|
||||||
NL = next(_counter)
|
|
||||||
tok_name[NL] = 'NL'
|
|
||||||
|
|
||||||
# Sets the attributes that don't exist in these tok_name versions.
|
|
||||||
if py_version >= 30:
|
|
||||||
BACKQUOTE = next(_counter)
|
|
||||||
tok_name[BACKQUOTE] = 'BACKQUOTE'
|
|
||||||
else:
|
|
||||||
RARROW = next(_counter)
|
|
||||||
tok_name[RARROW] = 'RARROW'
|
|
||||||
ELLIPSIS = next(_counter)
|
|
||||||
tok_name[ELLIPSIS] = 'ELLIPSIS'
|
|
||||||
|
|
||||||
if py_version < 35:
|
|
||||||
ATEQUAL = next(_counter)
|
|
||||||
tok_name[ATEQUAL] = 'ATEQUAL'
|
|
||||||
|
|
||||||
ERROR_DEDENT = next(_counter)
|
|
||||||
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
|
|
||||||
|
|
||||||
|
|
||||||
# Map from operator to number (since tokenize doesn't do this)
|
class TokenTypes(object):
|
||||||
|
|
||||||
opmap_raw = """\
|
|
||||||
( LPAR
|
|
||||||
) RPAR
|
|
||||||
[ LSQB
|
|
||||||
] RSQB
|
|
||||||
: COLON
|
|
||||||
, COMMA
|
|
||||||
; SEMI
|
|
||||||
+ PLUS
|
|
||||||
- MINUS
|
|
||||||
* STAR
|
|
||||||
/ SLASH
|
|
||||||
| VBAR
|
|
||||||
& AMPER
|
|
||||||
< LESS
|
|
||||||
> GREATER
|
|
||||||
= EQUAL
|
|
||||||
. DOT
|
|
||||||
% PERCENT
|
|
||||||
` BACKQUOTE
|
|
||||||
{ LBRACE
|
|
||||||
} RBRACE
|
|
||||||
@ AT
|
|
||||||
== EQEQUAL
|
|
||||||
!= NOTEQUAL
|
|
||||||
<> NOTEQUAL
|
|
||||||
<= LESSEQUAL
|
|
||||||
>= GREATEREQUAL
|
|
||||||
~ TILDE
|
|
||||||
^ CIRCUMFLEX
|
|
||||||
<< LEFTSHIFT
|
|
||||||
>> RIGHTSHIFT
|
|
||||||
** DOUBLESTAR
|
|
||||||
+= PLUSEQUAL
|
|
||||||
-= MINEQUAL
|
|
||||||
*= STAREQUAL
|
|
||||||
/= SLASHEQUAL
|
|
||||||
%= PERCENTEQUAL
|
|
||||||
&= AMPEREQUAL
|
|
||||||
|= VBAREQUAL
|
|
||||||
@= ATEQUAL
|
|
||||||
^= CIRCUMFLEXEQUAL
|
|
||||||
<<= LEFTSHIFTEQUAL
|
|
||||||
>>= RIGHTSHIFTEQUAL
|
|
||||||
**= DOUBLESTAREQUAL
|
|
||||||
// DOUBLESLASH
|
|
||||||
//= DOUBLESLASHEQUAL
|
|
||||||
-> RARROW
|
|
||||||
... ELLIPSIS
|
|
||||||
"""
|
|
||||||
|
|
||||||
opmap = {}
|
|
||||||
for line in opmap_raw.splitlines():
|
|
||||||
op, name = line.split()
|
|
||||||
opmap[op] = globals()[name]
|
|
||||||
|
|
||||||
|
|
||||||
def generate_token_id(string):
|
|
||||||
"""
|
"""
|
||||||
Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding
|
Basically an enum, but Python 2 doesn't have enums in the standard library.
|
||||||
ID for it. The strings are part of the grammar file.
|
|
||||||
"""
|
"""
|
||||||
try:
|
def __init__(self, names, contains_syntax):
|
||||||
return opmap[string]
|
for name in names:
|
||||||
except KeyError:
|
setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax))
|
||||||
pass
|
|
||||||
return globals()[string]
|
|
||||||
|
PythonTokenTypes = TokenTypes((
|
||||||
|
'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT',
|
||||||
|
'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP',
|
||||||
|
'ENDMARKER'),
|
||||||
|
contains_syntax=('NAME', 'OP'),
|
||||||
|
)
|
||||||
|
|||||||
@@ -18,16 +18,29 @@ from collections import namedtuple
|
|||||||
import itertools as _itertools
|
import itertools as _itertools
|
||||||
from codecs import BOM_UTF8
|
from codecs import BOM_UTF8
|
||||||
|
|
||||||
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
from parso.python.token import PythonTokenTypes
|
||||||
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
|
||||||
ERROR_DEDENT)
|
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.utils import split_lines
|
from parso.utils import split_lines
|
||||||
|
|
||||||
|
|
||||||
|
STRING = PythonTokenTypes.STRING
|
||||||
|
NAME = PythonTokenTypes.NAME
|
||||||
|
NUMBER = PythonTokenTypes.NUMBER
|
||||||
|
OP = PythonTokenTypes.OP
|
||||||
|
NEWLINE = PythonTokenTypes.NEWLINE
|
||||||
|
INDENT = PythonTokenTypes.INDENT
|
||||||
|
DEDENT = PythonTokenTypes.DEDENT
|
||||||
|
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||||
|
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
|
||||||
|
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||||
|
FSTRING_START = PythonTokenTypes.FSTRING_START
|
||||||
|
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
|
||||||
|
FSTRING_END = PythonTokenTypes.FSTRING_END
|
||||||
|
|
||||||
TokenCollection = namedtuple(
|
TokenCollection = namedtuple(
|
||||||
'TokenCollection',
|
'TokenCollection',
|
||||||
'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
|
'pseudo_token single_quoted triple_quoted endpats whitespace '
|
||||||
|
'fstring_pattern_map always_break_tokens',
|
||||||
)
|
)
|
||||||
|
|
||||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||||
@@ -52,32 +65,35 @@ def group(*choices, **kwargs):
|
|||||||
return start + '|'.join(choices) + ')'
|
return start + '|'.join(choices) + ')'
|
||||||
|
|
||||||
|
|
||||||
def any(*choices):
|
|
||||||
return group(*choices) + '*'
|
|
||||||
|
|
||||||
|
|
||||||
def maybe(*choices):
|
def maybe(*choices):
|
||||||
return group(*choices) + '?'
|
return group(*choices) + '?'
|
||||||
|
|
||||||
|
|
||||||
# Return the empty string, plus all of the valid string prefixes.
|
# Return the empty string, plus all of the valid string prefixes.
|
||||||
def _all_string_prefixes(version_info):
|
def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
|
||||||
def different_case_versions(prefix):
|
def different_case_versions(prefix):
|
||||||
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
|
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
|
||||||
yield ''.join(s)
|
yield ''.join(s)
|
||||||
# The valid string prefixes. Only contain the lower case versions,
|
# The valid string prefixes. Only contain the lower case versions,
|
||||||
# and don't contain any permuations (include 'fr', but not
|
# and don't contain any permuations (include 'fr', but not
|
||||||
# 'rf'). The various permutations will be generated.
|
# 'rf'). The various permutations will be generated.
|
||||||
_valid_string_prefixes = ['b', 'r', 'u']
|
valid_string_prefixes = ['b', 'r', 'u']
|
||||||
if version_info >= (3, 0):
|
if version_info >= (3, 0):
|
||||||
_valid_string_prefixes.append('br')
|
valid_string_prefixes.append('br')
|
||||||
|
|
||||||
if version_info >= (3, 6):
|
result = set([''])
|
||||||
_valid_string_prefixes += ['f', 'fr']
|
if version_info >= (3, 6) and include_fstring:
|
||||||
|
f = ['f', 'fr']
|
||||||
|
if only_fstring:
|
||||||
|
valid_string_prefixes = f
|
||||||
|
result = set()
|
||||||
|
else:
|
||||||
|
valid_string_prefixes += f
|
||||||
|
elif only_fstring:
|
||||||
|
return set()
|
||||||
|
|
||||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||||
result = set([''])
|
for prefix in valid_string_prefixes:
|
||||||
for prefix in _valid_string_prefixes:
|
|
||||||
for t in _itertools.permutations(prefix):
|
for t in _itertools.permutations(prefix):
|
||||||
# create a list with upper and lower versions of each
|
# create a list with upper and lower versions of each
|
||||||
# character
|
# character
|
||||||
@@ -102,10 +118,15 @@ def _get_token_collection(version_info):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
|
||||||
|
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
|
||||||
|
|
||||||
|
|
||||||
def _create_token_collection(version_info):
|
def _create_token_collection(version_info):
|
||||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||||
# number literals.
|
# number literals.
|
||||||
Whitespace = r'[ \f\t]*'
|
Whitespace = r'[ \f\t]*'
|
||||||
|
whitespace = _compile(Whitespace)
|
||||||
Comment = r'#[^\r\n]*'
|
Comment = r'#[^\r\n]*'
|
||||||
Name = r'\w+'
|
Name = r'\w+'
|
||||||
|
|
||||||
@@ -141,28 +162,31 @@ def _create_token_collection(version_info):
|
|||||||
# StringPrefix can be the empty string (making it optional).
|
# StringPrefix can be the empty string (making it optional).
|
||||||
possible_prefixes = _all_string_prefixes(version_info)
|
possible_prefixes = _all_string_prefixes(version_info)
|
||||||
StringPrefix = group(*possible_prefixes)
|
StringPrefix = group(*possible_prefixes)
|
||||||
|
StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
|
||||||
|
fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
|
||||||
|
FStringStart = group(*fstring_prefixes)
|
||||||
|
|
||||||
# Tail end of ' string.
|
# Tail end of ' string.
|
||||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
Single = r"(?:\\.|[^'\\])*'"
|
||||||
# Tail end of " string.
|
# Tail end of " string.
|
||||||
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
|
Double = r'(?:\\.|[^"\\])*"'
|
||||||
# Tail end of ''' string.
|
# Tail end of ''' string.
|
||||||
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
|
||||||
# Tail end of """ string.
|
# Tail end of """ string.
|
||||||
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
|
||||||
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
|
||||||
|
|
||||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||||
# longest operators first (e.g., if = came before ==, == would get
|
# longest operators first (e.g., if = came before ==, == would get
|
||||||
# recognized as two instances of =).
|
# recognized as two instances of =).
|
||||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
Operator = group(r"\*\*=?", r">>=?", r"<<=?",
|
||||||
r"//=?", r"->",
|
r"//=?", r"->",
|
||||||
r"[+\-*/%&@`|^=<>]=?",
|
r"[+\-*/%&@`|^!=<>]=?",
|
||||||
r"~")
|
r"~")
|
||||||
|
|
||||||
Bracket = '[][(){}]'
|
Bracket = '[][(){}]'
|
||||||
|
|
||||||
special_args = [r'\r?\n', r'[:;.,@]']
|
special_args = [r'\r\n?', r'\n', r'[:;.,@]']
|
||||||
if version_info >= (3, 0):
|
if version_info >= (3, 0):
|
||||||
special_args.insert(0, r'\.\.\.')
|
special_args.insert(0, r'\.\.\.')
|
||||||
Special = group(*special_args)
|
Special = group(*special_args)
|
||||||
@@ -170,11 +194,16 @@ def _create_token_collection(version_info):
|
|||||||
Funny = group(Operator, Bracket, Special)
|
Funny = group(Operator, Bracket, Special)
|
||||||
|
|
||||||
# First (or only) line of ' or " string.
|
# First (or only) line of ' or " string.
|
||||||
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
ContStr = group(StringPrefix + r"'[^\r\n'\\]*(?:\\.[^\r\n'\\]*)*" +
|
||||||
group("'", r'\\\r?\n'),
|
group("'", r'\\(?:\r\n?|\n)'),
|
||||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
StringPrefix + r'"[^\r\n"\\]*(?:\\.[^\r\n"\\]*)*' +
|
||||||
group('"', r'\\\r?\n'))
|
group('"', r'\\(?:\r\n?|\n)'))
|
||||||
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
|
pseudo_extra_pool = [Comment, Triple]
|
||||||
|
all_quotes = '"', "'", '"""', "'''"
|
||||||
|
if fstring_prefixes:
|
||||||
|
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||||
|
|
||||||
|
PseudoExtras = group(r'\\(?:\r\n?|\n)|\Z', *pseudo_extra_pool)
|
||||||
PseudoToken = group(Whitespace, capture=True) + \
|
PseudoToken = group(Whitespace, capture=True) + \
|
||||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||||
|
|
||||||
@@ -192,18 +221,24 @@ def _create_token_collection(version_info):
|
|||||||
# including the opening quotes.
|
# including the opening quotes.
|
||||||
single_quoted = set()
|
single_quoted = set()
|
||||||
triple_quoted = set()
|
triple_quoted = set()
|
||||||
|
fstring_pattern_map = {}
|
||||||
for t in possible_prefixes:
|
for t in possible_prefixes:
|
||||||
for p in (t + '"', t + "'"):
|
for quote in '"', "'":
|
||||||
single_quoted.add(p)
|
single_quoted.add(t + quote)
|
||||||
for p in (t + '"""', t + "'''"):
|
|
||||||
triple_quoted.add(p)
|
for quote in '"""', "'''":
|
||||||
|
triple_quoted.add(t + quote)
|
||||||
|
|
||||||
|
for t in fstring_prefixes:
|
||||||
|
for quote in all_quotes:
|
||||||
|
fstring_pattern_map[t + quote] = quote
|
||||||
|
|
||||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||||
'finally', 'while', 'with', 'return')
|
'finally', 'while', 'with', 'return')
|
||||||
pseudo_token_compiled = _compile(PseudoToken)
|
pseudo_token_compiled = _compile(PseudoToken)
|
||||||
return TokenCollection(
|
return TokenCollection(
|
||||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||||
ALWAYS_BREAK_TOKENS
|
whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -218,12 +253,81 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
|||||||
|
|
||||||
|
|
||||||
class PythonToken(Token):
|
class PythonToken(Token):
|
||||||
def _get_type_name(self, exact=True):
|
|
||||||
return tok_name[self.type]
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
|
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
||||||
self._replace(type=self._get_type_name()))
|
self._replace(type=self.type.name))
|
||||||
|
|
||||||
|
|
||||||
|
class FStringNode(object):
|
||||||
|
def __init__(self, quote):
|
||||||
|
self.quote = quote
|
||||||
|
self.parentheses_count = 0
|
||||||
|
self.previous_lines = ''
|
||||||
|
self.last_string_start_pos = None
|
||||||
|
# In the syntax there can be multiple format_spec's nested:
|
||||||
|
# {x:{y:3}}
|
||||||
|
self.format_spec_count = 0
|
||||||
|
|
||||||
|
def open_parentheses(self, character):
|
||||||
|
self.parentheses_count += 1
|
||||||
|
|
||||||
|
def close_parentheses(self, character):
|
||||||
|
self.parentheses_count -= 1
|
||||||
|
if self.parentheses_count == 0:
|
||||||
|
# No parentheses means that the format spec is also finished.
|
||||||
|
self.format_spec_count = 0
|
||||||
|
|
||||||
|
def allow_multiline(self):
|
||||||
|
return len(self.quote) == 3
|
||||||
|
|
||||||
|
def is_in_expr(self):
|
||||||
|
return (self.parentheses_count - self.format_spec_count) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix):
|
||||||
|
for fstring_stack_index, node in enumerate(fstring_stack):
|
||||||
|
if string.startswith(node.quote):
|
||||||
|
token = PythonToken(
|
||||||
|
FSTRING_END,
|
||||||
|
node.quote,
|
||||||
|
start_pos,
|
||||||
|
prefix=additional_prefix,
|
||||||
|
)
|
||||||
|
additional_prefix = ''
|
||||||
|
assert not node.previous_lines
|
||||||
|
del fstring_stack[fstring_stack_index:]
|
||||||
|
return token, '', len(node.quote)
|
||||||
|
return None, additional_prefix, 0
|
||||||
|
|
||||||
|
|
||||||
|
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
|
||||||
|
tos = fstring_stack[-1]
|
||||||
|
allow_multiline = tos.allow_multiline()
|
||||||
|
if allow_multiline:
|
||||||
|
match = fstring_string_multi_line.match(line, pos)
|
||||||
|
else:
|
||||||
|
match = fstring_string_single_line.match(line, pos)
|
||||||
|
if match is None:
|
||||||
|
return tos.previous_lines, pos
|
||||||
|
|
||||||
|
if not tos.previous_lines:
|
||||||
|
tos.last_string_start_pos = (lnum, pos)
|
||||||
|
|
||||||
|
string = match.group(0)
|
||||||
|
for fstring_stack_node in fstring_stack:
|
||||||
|
end_match = endpats[fstring_stack_node.quote].match(string)
|
||||||
|
if end_match is not None:
|
||||||
|
string = end_match.group(0)[:-len(fstring_stack_node.quote)]
|
||||||
|
|
||||||
|
new_pos = pos
|
||||||
|
new_pos += len(string)
|
||||||
|
if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
|
||||||
|
tos.previous_lines += string
|
||||||
|
string = ''
|
||||||
|
else:
|
||||||
|
string = tos.previous_lines + string
|
||||||
|
|
||||||
|
return string, new_pos
|
||||||
|
|
||||||
|
|
||||||
def tokenize(code, version_info, start_pos=(1, 0)):
|
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||||
@@ -232,6 +336,18 @@ def tokenize(code, version_info, start_pos=(1, 0)):
|
|||||||
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||||
|
|
||||||
|
|
||||||
|
def _print_tokens(func):
|
||||||
|
"""
|
||||||
|
A small helper function to help debug the tokenize_lines function.
|
||||||
|
"""
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
for token in func(*args, **kwargs):
|
||||||
|
yield token
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
# @_print_tokens
|
||||||
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||||
"""
|
"""
|
||||||
A heavily modified Python standard library tokenizer.
|
A heavily modified Python standard library tokenizer.
|
||||||
@@ -240,7 +356,16 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
token. This idea comes from lib2to3. The prefix contains all information
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
"""
|
"""
|
||||||
pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
|
def dedent_if_necessary(start):
|
||||||
|
while start < indents[-1]:
|
||||||
|
if start > indents[-2]:
|
||||||
|
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
||||||
|
break
|
||||||
|
yield PythonToken(DEDENT, '', spos, '')
|
||||||
|
indents.pop()
|
||||||
|
|
||||||
|
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||||
|
fstring_pattern_map, always_break_tokens, = \
|
||||||
_get_token_collection(version_info)
|
_get_token_collection(version_info)
|
||||||
paren_level = 0 # count parentheses
|
paren_level = 0 # count parentheses
|
||||||
indents = [0]
|
indents = [0]
|
||||||
@@ -257,6 +382,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
first = True
|
first = True
|
||||||
lnum = start_pos[0] - 1
|
lnum = start_pos[0] - 1
|
||||||
|
fstring_stack = []
|
||||||
for line in lines: # loop over lines in stream
|
for line in lines: # loop over lines in stream
|
||||||
lnum += 1
|
lnum += 1
|
||||||
pos = 0
|
pos = 0
|
||||||
@@ -278,7 +404,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
endmatch = endprog.match(line)
|
endmatch = endprog.match(line)
|
||||||
if endmatch:
|
if endmatch:
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix)
|
yield PythonToken(
|
||||||
|
STRING, contstr + line[:pos],
|
||||||
|
contstr_start, prefix)
|
||||||
contstr = ''
|
contstr = ''
|
||||||
contline = None
|
contline = None
|
||||||
else:
|
else:
|
||||||
@@ -287,14 +415,50 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
while pos < max:
|
while pos < max:
|
||||||
|
if fstring_stack:
|
||||||
|
tos = fstring_stack[-1]
|
||||||
|
if not tos.is_in_expr():
|
||||||
|
string, pos = _find_fstring_string(endpats, fstring_stack, line, lnum, pos)
|
||||||
|
if pos == max:
|
||||||
|
break
|
||||||
|
if string:
|
||||||
|
yield PythonToken(
|
||||||
|
FSTRING_STRING, string,
|
||||||
|
tos.last_string_start_pos,
|
||||||
|
# Never has a prefix because it can start anywhere and
|
||||||
|
# include whitespace.
|
||||||
|
prefix=''
|
||||||
|
)
|
||||||
|
tos.previous_lines = ''
|
||||||
|
continue
|
||||||
|
|
||||||
|
rest = line[pos:]
|
||||||
|
fstring_end_token, additional_prefix, quote_length = _close_fstring_if_necessary(
|
||||||
|
fstring_stack,
|
||||||
|
rest,
|
||||||
|
(lnum, pos),
|
||||||
|
additional_prefix,
|
||||||
|
)
|
||||||
|
pos += quote_length
|
||||||
|
if fstring_end_token is not None:
|
||||||
|
yield fstring_end_token
|
||||||
|
continue
|
||||||
|
|
||||||
pseudomatch = pseudo_token.match(line, pos)
|
pseudomatch = pseudo_token.match(line, pos)
|
||||||
if not pseudomatch: # scan for tokens
|
if not pseudomatch: # scan for tokens
|
||||||
txt = line[pos:]
|
match = whitespace.match(line, pos)
|
||||||
if txt.endswith('\n'):
|
if pos == 0:
|
||||||
new_line = True
|
for t in dedent_if_necessary(match.end()):
|
||||||
yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
|
yield t
|
||||||
|
pos = match.end()
|
||||||
|
new_line = False
|
||||||
|
yield PythonToken(
|
||||||
|
ERRORTOKEN, line[pos], (lnum, pos),
|
||||||
|
additional_prefix + match.group(0)
|
||||||
|
)
|
||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
break
|
pos += 1
|
||||||
|
continue
|
||||||
|
|
||||||
prefix = additional_prefix + pseudomatch.group(1)
|
prefix = additional_prefix + pseudomatch.group(1)
|
||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
@@ -309,28 +473,31 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
break
|
break
|
||||||
initial = token[0]
|
initial = token[0]
|
||||||
|
|
||||||
if new_line and initial not in '\r\n#':
|
if new_line and initial not in '\r\n\\#':
|
||||||
new_line = False
|
new_line = False
|
||||||
if paren_level == 0:
|
if paren_level == 0 and not fstring_stack:
|
||||||
i = 0
|
i = 0
|
||||||
|
indent_start = start
|
||||||
while line[i] == '\f':
|
while line[i] == '\f':
|
||||||
i += 1
|
i += 1
|
||||||
start -= 1
|
# TODO don't we need to change spos as well?
|
||||||
if start > indents[-1]:
|
indent_start -= 1
|
||||||
|
if indent_start > indents[-1]:
|
||||||
yield PythonToken(INDENT, '', spos, '')
|
yield PythonToken(INDENT, '', spos, '')
|
||||||
indents.append(start)
|
indents.append(indent_start)
|
||||||
while start < indents[-1]:
|
for t in dedent_if_necessary(indent_start):
|
||||||
if start > indents[-2]:
|
yield t
|
||||||
yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '')
|
|
||||||
break
|
|
||||||
yield PythonToken(DEDENT, '', spos, '')
|
|
||||||
indents.pop()
|
|
||||||
|
|
||||||
if (initial in numchars or # ordinary number
|
if (initial in numchars or # ordinary number
|
||||||
(initial == '.' and token != '.' and token != '...')):
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
yield PythonToken(NUMBER, token, spos, prefix)
|
yield PythonToken(NUMBER, token, spos, prefix)
|
||||||
elif initial in '\r\n':
|
elif initial in '\r\n':
|
||||||
if not new_line and paren_level == 0:
|
if any(not f.allow_multiline() for f in fstring_stack):
|
||||||
|
# Would use fstring_stack.clear, but that's not available
|
||||||
|
# in Python 2.
|
||||||
|
fstring_stack[:] = []
|
||||||
|
|
||||||
|
if not new_line and paren_level == 0 and not fstring_stack:
|
||||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
yield PythonToken(NEWLINE, token, spos, prefix)
|
||||||
else:
|
else:
|
||||||
additional_prefix = prefix + token
|
additional_prefix = prefix + token
|
||||||
@@ -350,10 +517,23 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
contstr = line[start:]
|
contstr = line[start:]
|
||||||
contline = line
|
contline = line
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Check up to the first 3 chars of the token to see if
|
||||||
|
# they're in the single_quoted set. If so, they start
|
||||||
|
# a string.
|
||||||
|
# We're using the first 3, because we're looking for
|
||||||
|
# "rb'" (for example) at the start of the token. If
|
||||||
|
# we switch to longer prefixes, this needs to be
|
||||||
|
# adjusted.
|
||||||
|
# Note that initial == token[:1].
|
||||||
|
# Also note that single quote checking must come after
|
||||||
|
# triple quote checking (above).
|
||||||
elif initial in single_quoted or \
|
elif initial in single_quoted or \
|
||||||
token[:2] in single_quoted or \
|
token[:2] in single_quoted or \
|
||||||
token[:3] in single_quoted:
|
token[:3] in single_quoted:
|
||||||
if token[-1] == '\n': # continued string
|
if token[-1] in '\r\n': # continued string
|
||||||
|
# This means that a single quoted string ends with a
|
||||||
|
# backslash and is continued.
|
||||||
contstr_start = lnum, start
|
contstr_start = lnum, start
|
||||||
endprog = (endpats.get(initial) or endpats.get(token[1])
|
endprog = (endpats.get(initial) or endpats.get(token[1])
|
||||||
or endpats.get(token[2]))
|
or endpats.get(token[2]))
|
||||||
@@ -362,9 +542,15 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
break
|
break
|
||||||
else: # ordinary string
|
else: # ordinary string
|
||||||
yield PythonToken(STRING, token, spos, prefix)
|
yield PythonToken(STRING, token, spos, prefix)
|
||||||
|
elif token in fstring_pattern_map: # The start of an fstring.
|
||||||
|
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||||
|
yield PythonToken(FSTRING_START, token, spos, prefix)
|
||||||
elif is_identifier(initial): # ordinary name
|
elif is_identifier(initial): # ordinary name
|
||||||
if token in always_break_tokens:
|
if token in always_break_tokens:
|
||||||
|
fstring_stack[:] = []
|
||||||
paren_level = 0
|
paren_level = 0
|
||||||
|
# We only want to dedent if the token is on a new line.
|
||||||
|
if re.match(r'[ \f\t]*$', line[:start]):
|
||||||
while True:
|
while True:
|
||||||
indent = indents.pop()
|
indent = indents.pop()
|
||||||
if indent > start:
|
if indent > start:
|
||||||
@@ -373,26 +559,30 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
indents.append(indent)
|
indents.append(indent)
|
||||||
break
|
break
|
||||||
yield PythonToken(NAME, token, spos, prefix)
|
yield PythonToken(NAME, token, spos, prefix)
|
||||||
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt
|
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
|
||||||
additional_prefix += prefix + line[start:]
|
additional_prefix += prefix + line[start:]
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
if token in '([{':
|
if token in '([{':
|
||||||
|
if fstring_stack:
|
||||||
|
fstring_stack[-1].open_parentheses(token)
|
||||||
|
else:
|
||||||
paren_level += 1
|
paren_level += 1
|
||||||
elif token in ')]}':
|
elif token in ')]}':
|
||||||
|
if fstring_stack:
|
||||||
|
fstring_stack[-1].close_parentheses(token)
|
||||||
|
else:
|
||||||
|
if paren_level:
|
||||||
paren_level -= 1
|
paren_level -= 1
|
||||||
|
elif token == ':' and fstring_stack \
|
||||||
|
and fstring_stack[-1].parentheses_count == 1:
|
||||||
|
fstring_stack[-1].format_spec_count += 1
|
||||||
|
|
||||||
try:
|
yield PythonToken(OP, token, spos, prefix)
|
||||||
# This check is needed in any case to check if it's a valid
|
|
||||||
# operator or just some random unicode character.
|
|
||||||
typ = opmap[token]
|
|
||||||
except KeyError:
|
|
||||||
typ = ERRORTOKEN
|
|
||||||
yield PythonToken(typ, token, spos, prefix)
|
|
||||||
|
|
||||||
if contstr:
|
if contstr:
|
||||||
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
|
||||||
if contstr.endswith('\n'):
|
if contstr.endswith('\n') or contstr.endswith('\r'):
|
||||||
new_line = True
|
new_line = True
|
||||||
|
|
||||||
end_pos = lnum, max
|
end_pos = lnum, max
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ from parso._compatibility import utf8_repr, unicode
|
|||||||
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
|
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
|
||||||
search_ancestor
|
search_ancestor
|
||||||
from parso.python.prefix import split_prefix
|
from parso.python.prefix import split_prefix
|
||||||
|
from parso.utils import split_lines
|
||||||
|
|
||||||
_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
|
_FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
|
||||||
'with_stmt', 'async_stmt', 'suite'])
|
'with_stmt', 'async_stmt', 'suite'])
|
||||||
@@ -60,7 +61,6 @@ _GET_DEFINITION_TYPES = set([
|
|||||||
_IMPORTS = set(['import_name', 'import_from'])
|
_IMPORTS = set(['import_name', 'import_from'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DocstringMixin(object):
|
class DocstringMixin(object):
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
|
|
||||||
@@ -125,15 +125,16 @@ class PythonLeaf(PythonMixin, Leaf):
|
|||||||
# indent error leafs somehow? No idea how, though.
|
# indent error leafs somehow? No idea how, though.
|
||||||
previous_leaf = self.get_previous_leaf()
|
previous_leaf = self.get_previous_leaf()
|
||||||
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
|
if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
|
||||||
and previous_leaf.original_type in ('indent', 'error_dedent'):
|
and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'):
|
||||||
previous_leaf = previous_leaf.get_previous_leaf()
|
previous_leaf = previous_leaf.get_previous_leaf()
|
||||||
|
|
||||||
if previous_leaf is None:
|
if previous_leaf is None: # It's the first leaf.
|
||||||
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
|
lines = split_lines(self.prefix)
|
||||||
|
# + 1 is needed because split_lines always returns at least [''].
|
||||||
|
return self.line - len(lines) + 1, 0 # It's the first leaf.
|
||||||
return previous_leaf.end_pos
|
return previous_leaf.end_pos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class _LeafWithoutNewlines(PythonLeaf):
|
class _LeafWithoutNewlines(PythonLeaf):
|
||||||
"""
|
"""
|
||||||
Simply here to optimize performance.
|
Simply here to optimize performance.
|
||||||
@@ -166,6 +167,12 @@ class EndMarker(_LeafWithoutNewlines):
|
|||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
type = 'endmarker'
|
type = 'endmarker'
|
||||||
|
|
||||||
|
@utf8_repr
|
||||||
|
def __repr__(self):
|
||||||
|
return "<%s: prefix=%s end_pos=%s>" % (
|
||||||
|
type(self).__name__, repr(self.prefix), self.end_pos
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Newline(PythonLeaf):
|
class Newline(PythonLeaf):
|
||||||
"""Contains NEWLINE and ENDMARKER tokens."""
|
"""Contains NEWLINE and ENDMARKER tokens."""
|
||||||
@@ -235,7 +242,6 @@ class Name(_LeafWithoutNewlines):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Literal(PythonLeaf):
|
class Literal(PythonLeaf):
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
|
|
||||||
@@ -251,7 +257,7 @@ class String(Literal):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def string_prefix(self):
|
def string_prefix(self):
|
||||||
return re.match('\w*(?=[\'"])', self.value).group(0)
|
return re.match(r'\w*(?=[\'"])', self.value).group(0)
|
||||||
|
|
||||||
def _get_payload(self):
|
def _get_payload(self):
|
||||||
match = re.search(
|
match = re.search(
|
||||||
@@ -262,6 +268,33 @@ class String(Literal):
|
|||||||
return match.group(2)[:-len(match.group(1))]
|
return match.group(2)[:-len(match.group(1))]
|
||||||
|
|
||||||
|
|
||||||
|
class FStringString(PythonLeaf):
|
||||||
|
"""
|
||||||
|
f-strings contain f-string expressions and normal python strings. These are
|
||||||
|
the string parts of f-strings.
|
||||||
|
"""
|
||||||
|
type = 'fstring_string'
|
||||||
|
__slots__ = ()
|
||||||
|
|
||||||
|
|
||||||
|
class FStringStart(PythonLeaf):
|
||||||
|
"""
|
||||||
|
f-strings contain f-string expressions and normal python strings. These are
|
||||||
|
the string parts of f-strings.
|
||||||
|
"""
|
||||||
|
type = 'fstring_start'
|
||||||
|
__slots__ = ()
|
||||||
|
|
||||||
|
|
||||||
|
class FStringEnd(PythonLeaf):
|
||||||
|
"""
|
||||||
|
f-strings contain f-string expressions and normal python strings. These are
|
||||||
|
the string parts of f-strings.
|
||||||
|
"""
|
||||||
|
type = 'fstring_end'
|
||||||
|
__slots__ = ()
|
||||||
|
|
||||||
|
|
||||||
class _StringComparisonMixin(object):
|
class _StringComparisonMixin(object):
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
"""
|
"""
|
||||||
@@ -509,7 +542,9 @@ def _create_params(parent, argslist_list):
|
|||||||
if child is None or child == ',':
|
if child is None or child == ',':
|
||||||
param_children = children[start:end]
|
param_children = children[start:end]
|
||||||
if param_children: # Could as well be comma and then end.
|
if param_children: # Could as well be comma and then end.
|
||||||
if param_children[0] == '*' and param_children[1] == ',' \
|
if param_children[0] == '*' \
|
||||||
|
and (len(param_children) == 1
|
||||||
|
or param_children[1] == ',') \
|
||||||
or check_python2_nested_param(param_children[0]):
|
or check_python2_nested_param(param_children[0]):
|
||||||
for p in param_children:
|
for p in param_children:
|
||||||
p.parent = parent
|
p.parent = parent
|
||||||
@@ -626,6 +661,7 @@ class Function(ClassOrFunc):
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Lambda(Function):
|
class Lambda(Function):
|
||||||
"""
|
"""
|
||||||
Lambdas are basically trimmed functions, so give it the same interface.
|
Lambdas are basically trimmed functions, so give it the same interface.
|
||||||
@@ -933,7 +969,7 @@ class ImportName(Import):
|
|||||||
class KeywordStatement(PythonBaseNode):
|
class KeywordStatement(PythonBaseNode):
|
||||||
"""
|
"""
|
||||||
For the following statements: `assert`, `del`, `global`, `nonlocal`,
|
For the following statements: `assert`, `del`, `global`, `nonlocal`,
|
||||||
`raise`, `return`, `yield`, `return`, `yield`.
|
`raise`, `return`, `yield`.
|
||||||
|
|
||||||
`pass`, `continue` and `break` are not in there, because they are just
|
`pass`, `continue` and `break` are not in there, because they are just
|
||||||
simple keywords and the parser reduces it to a keyword.
|
simple keywords and the parser reduces it to a keyword.
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
from abc import abstractmethod, abstractproperty
|
from abc import abstractmethod, abstractproperty
|
||||||
|
|
||||||
from parso._compatibility import utf8_repr, encoding, py_version
|
from parso._compatibility import utf8_repr, encoding, py_version
|
||||||
|
from parso.utils import split_lines
|
||||||
|
|
||||||
|
|
||||||
def search_ancestor(node, *node_types):
|
def search_ancestor(node, *node_types):
|
||||||
@@ -55,7 +57,6 @@ class NodeOrLeaf(object):
|
|||||||
Returns the node immediately preceding this node in this parent's
|
Returns the node immediately preceding this node in this parent's
|
||||||
children list. If this node does not have a previous sibling, it is
|
children list. If this node does not have a previous sibling, it is
|
||||||
None.
|
None.
|
||||||
None.
|
|
||||||
"""
|
"""
|
||||||
# Can't use index(); we need to test by identity
|
# Can't use index(); we need to test by identity
|
||||||
for i, child in enumerate(self.parent.children):
|
for i, child in enumerate(self.parent.children):
|
||||||
@@ -194,7 +195,9 @@ class Leaf(NodeOrLeaf):
|
|||||||
def get_start_pos_of_prefix(self):
|
def get_start_pos_of_prefix(self):
|
||||||
previous_leaf = self.get_previous_leaf()
|
previous_leaf = self.get_previous_leaf()
|
||||||
if previous_leaf is None:
|
if previous_leaf is None:
|
||||||
return self.line - self.prefix.count('\n'), 0 # It's the first leaf.
|
lines = split_lines(self.prefix)
|
||||||
|
# + 1 is needed because split_lines always returns at least [''].
|
||||||
|
return self.line - len(lines) + 1, 0 # It's the first leaf.
|
||||||
return previous_leaf.end_pos
|
return previous_leaf.end_pos
|
||||||
|
|
||||||
def get_first_leaf(self):
|
def get_first_leaf(self):
|
||||||
@@ -211,7 +214,7 @@ class Leaf(NodeOrLeaf):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def end_pos(self):
|
def end_pos(self):
|
||||||
lines = self.value.split('\n')
|
lines = split_lines(self.value)
|
||||||
end_pos_line = self.line + len(lines) - 1
|
end_pos_line = self.line + len(lines) - 1
|
||||||
# Check for multiline token
|
# Check for multiline token
|
||||||
if self.line == end_pos_line:
|
if self.line == end_pos_line:
|
||||||
@@ -230,6 +233,7 @@ class Leaf(NodeOrLeaf):
|
|||||||
|
|
||||||
class TypedLeaf(Leaf):
|
class TypedLeaf(Leaf):
|
||||||
__slots__ = ('type',)
|
__slots__ = ('type',)
|
||||||
|
|
||||||
def __init__(self, type, value, start_pos, prefix=''):
|
def __init__(self, type, value, start_pos, prefix=''):
|
||||||
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
super(TypedLeaf, self).__init__(value, start_pos, prefix)
|
||||||
self.type = type
|
self.type = type
|
||||||
@@ -244,8 +248,6 @@ class BaseNode(NodeOrLeaf):
|
|||||||
type = None
|
type = None
|
||||||
|
|
||||||
def __init__(self, children):
|
def __init__(self, children):
|
||||||
for c in children:
|
|
||||||
c.parent = self
|
|
||||||
self.children = children
|
self.children = children
|
||||||
"""
|
"""
|
||||||
A list of :class:`NodeOrLeaf` child nodes.
|
A list of :class:`NodeOrLeaf` child nodes.
|
||||||
@@ -318,7 +320,7 @@ class BaseNode(NodeOrLeaf):
|
|||||||
|
|
||||||
@utf8_repr
|
@utf8_repr
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
code = self.get_code().replace('\n', ' ').strip()
|
code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip()
|
||||||
if not py_version >= 30:
|
if not py_version >= 30:
|
||||||
code = code.encode(encoding, 'replace')
|
code = code.encode(encoding, 'replace')
|
||||||
return "<%s: %s@%s,%s>" % \
|
return "<%s: %s@%s,%s>" % \
|
||||||
@@ -339,7 +341,7 @@ class Node(BaseNode):
|
|||||||
|
|
||||||
class ErrorNode(BaseNode):
|
class ErrorNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
A node that containes valid nodes/leaves that we're follow by a token that
|
A node that contains valid nodes/leaves that we're follow by a token that
|
||||||
was invalid. This basically means that the leaf after this node is where
|
was invalid. This basically means that the leaf after this node is where
|
||||||
Python would mark a syntax error.
|
Python would mark a syntax error.
|
||||||
"""
|
"""
|
||||||
@@ -352,13 +354,13 @@ class ErrorLeaf(Leaf):
|
|||||||
A leaf that is either completely invalid in a language (like `$` in Python)
|
A leaf that is either completely invalid in a language (like `$` in Python)
|
||||||
or is invalid at that position. Like the star in `1 +* 1`.
|
or is invalid at that position. Like the star in `1 +* 1`.
|
||||||
"""
|
"""
|
||||||
__slots__ = ('original_type',)
|
__slots__ = ('token_type',)
|
||||||
type = 'error_leaf'
|
type = 'error_leaf'
|
||||||
|
|
||||||
def __init__(self, original_type, value, start_pos, prefix=''):
|
def __init__(self, token_type, value, start_pos, prefix=''):
|
||||||
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
super(ErrorLeaf, self).__init__(value, start_pos, prefix)
|
||||||
self.original_type = original_type
|
self.token_type = token_type
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<%s: %s:%s, %s>" % \
|
return "<%s: %s:%s, %s>" % \
|
||||||
(type(self).__name__, self.original_type, repr(self.value), self.start_pos)
|
(type(self).__name__, self.token_type, repr(self.value), self.start_pos)
|
||||||
|
|||||||
@@ -5,6 +5,20 @@ from ast import literal_eval
|
|||||||
|
|
||||||
from parso._compatibility import unicode, total_ordering
|
from parso._compatibility import unicode, total_ordering
|
||||||
|
|
||||||
|
# The following is a list in Python that are line breaks in str.splitlines, but
|
||||||
|
# not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
|
||||||
|
# 0xA) are allowed to split lines.
|
||||||
|
_NON_LINE_BREAKS = (
|
||||||
|
u'\v', # Vertical Tabulation 0xB
|
||||||
|
u'\f', # Form Feed 0xC
|
||||||
|
u'\x1C', # File Separator
|
||||||
|
u'\x1D', # Group Separator
|
||||||
|
u'\x1E', # Record Separator
|
||||||
|
u'\x85', # Next Line (NEL - Equivalent to CR+LF.
|
||||||
|
# Used to mark end-of-line on some IBM mainframes.)
|
||||||
|
u'\u2028', # Line Separator
|
||||||
|
u'\u2029', # Paragraph Separator
|
||||||
|
)
|
||||||
|
|
||||||
Version = namedtuple('Version', 'major, minor, micro')
|
Version = namedtuple('Version', 'major, minor, micro')
|
||||||
|
|
||||||
@@ -26,7 +40,12 @@ def split_lines(string, keepends=False):
|
|||||||
# We have to merge lines that were broken by form feed characters.
|
# We have to merge lines that were broken by form feed characters.
|
||||||
merge = []
|
merge = []
|
||||||
for i, line in enumerate(lst):
|
for i, line in enumerate(lst):
|
||||||
if line.endswith('\f'):
|
try:
|
||||||
|
last_chr = line[-1]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if last_chr in _NON_LINE_BREAKS:
|
||||||
merge.append(i)
|
merge.append(i)
|
||||||
|
|
||||||
for index in reversed(merge):
|
for index in reversed(merge):
|
||||||
@@ -41,11 +60,11 @@ def split_lines(string, keepends=False):
|
|||||||
# The stdlib's implementation of the end is inconsistent when calling
|
# The stdlib's implementation of the end is inconsistent when calling
|
||||||
# it with/without keepends. One time there's an empty string in the
|
# it with/without keepends. One time there's an empty string in the
|
||||||
# end, one time there's none.
|
# end, one time there's none.
|
||||||
if string.endswith('\n') or string == '':
|
if string.endswith('\n') or string.endswith('\r') or string == '':
|
||||||
lst.append('')
|
lst.append('')
|
||||||
return lst
|
return lst
|
||||||
else:
|
else:
|
||||||
return re.split('\n|\r\n', string)
|
return re.split(r'\n|\r\n|\r', string)
|
||||||
|
|
||||||
|
|
||||||
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
|
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
|
||||||
|
|||||||
8
setup.py
8
setup.py
@@ -40,8 +40,16 @@ setup(name='parso',
|
|||||||
'Programming Language :: Python :: 3.3',
|
'Programming Language :: Python :: 3.3',
|
||||||
'Programming Language :: Python :: 3.4',
|
'Programming Language :: Python :: 3.4',
|
||||||
'Programming Language :: Python :: 3.5',
|
'Programming Language :: Python :: 3.5',
|
||||||
|
'Programming Language :: Python :: 3.6',
|
||||||
|
'Programming Language :: Python :: 3.7',
|
||||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
'Topic :: Text Editors :: Integrated Development Environments (IDE)',
|
'Topic :: Text Editors :: Integrated Development Environments (IDE)',
|
||||||
'Topic :: Utilities',
|
'Topic :: Utilities',
|
||||||
],
|
],
|
||||||
|
extras_require={
|
||||||
|
'testing': [
|
||||||
|
'pytest>=3.0.7',
|
||||||
|
'docopt',
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ FAILING_EXAMPLES = [
|
|||||||
|
|
||||||
# f-strings
|
# f-strings
|
||||||
'f"{}"',
|
'f"{}"',
|
||||||
'f"{\\}"',
|
r'f"{\}"',
|
||||||
'f"{\'\\\'}"',
|
'f"{\'\\\'}"',
|
||||||
'f"{#}"',
|
'f"{#}"',
|
||||||
"f'{1!b}'",
|
"f'{1!b}'",
|
||||||
@@ -285,6 +285,14 @@ if sys.version_info >= (3,):
|
|||||||
'b"ä"',
|
'b"ä"',
|
||||||
# combining strings and unicode is allowed in Python 2.
|
# combining strings and unicode is allowed in Python 2.
|
||||||
'"s" b""',
|
'"s" b""',
|
||||||
|
'"s" b"" ""',
|
||||||
|
'b"" "" b"" ""',
|
||||||
|
]
|
||||||
|
if sys.version_info >= (3, 6):
|
||||||
|
FAILING_EXAMPLES += [
|
||||||
|
# Same as above, but for f-strings.
|
||||||
|
'f"s" b""',
|
||||||
|
'b"s" f""',
|
||||||
]
|
]
|
||||||
if sys.version_info >= (2, 7):
|
if sys.version_info >= (2, 7):
|
||||||
# This is something that raises a different error in 2.6 than in the other
|
# This is something that raises a different error in 2.6 than in the other
|
||||||
|
|||||||
290
test/fuzz_diff_parser.py
Normal file
290
test/fuzz_diff_parser.py
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
"""
|
||||||
|
A script to find bugs in the diff parser.
|
||||||
|
|
||||||
|
This script is extremely useful if changes are made to the diff parser. By
|
||||||
|
running a few thousand iterations, we can assure that the diff parser is in
|
||||||
|
good shape.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
fuzz_diff_parser.py [--pdb|--ipdb] [-l] [-n=<nr>] [-x=<nr>] random [<path>]
|
||||||
|
fuzz_diff_parser.py [--pdb|--ipdb] [-l] redo [-o=<nr>] [-p]
|
||||||
|
fuzz_diff_parser.py -h | --help
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h --help Show this screen
|
||||||
|
-n, --maxtries=<nr> Maximum of random tries [default: 1000]
|
||||||
|
-x, --changes=<nr> Amount of changes to be done to a file per try [default: 5]
|
||||||
|
-l, --logging Prints all the logs
|
||||||
|
-o, --only-last=<nr> Only runs the last n iterations; Defaults to running all
|
||||||
|
-p, --print-code Print all test diffs
|
||||||
|
--pdb Launch pdb when error is raised
|
||||||
|
--ipdb Launch ipdb when error is raised
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import parso
|
||||||
|
from parso.utils import split_lines
|
||||||
|
from test.test_diff_parser import _check_error_leaves_nodes
|
||||||
|
|
||||||
|
_latest_grammar = parso.load_grammar(version='3.8')
|
||||||
|
_python_reserved_strings = tuple(
|
||||||
|
# Keywords are ususally only interesting in combination with spaces after
|
||||||
|
# them. We don't put a space before keywords, to avoid indentation errors.
|
||||||
|
s + (' ' if s.isalpha() else '')
|
||||||
|
for s in _latest_grammar._pgen_grammar.reserved_syntax_strings.keys()
|
||||||
|
)
|
||||||
|
_random_python_fragments = _python_reserved_strings + (
|
||||||
|
' ', '\t', '\n', '\r', '\f', 'f"', 'F"""', "fr'", "RF'''", '"', '"""', "'",
|
||||||
|
"'''", ';', ' some_random_word ', '\\', '#',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def find_python_files_in_tree(file_path):
|
||||||
|
if not os.path.isdir(file_path):
|
||||||
|
yield file_path
|
||||||
|
return
|
||||||
|
for root, dirnames, filenames in os.walk(file_path):
|
||||||
|
for name in filenames:
|
||||||
|
if name.endswith('.py'):
|
||||||
|
yield os.path.join(root, name)
|
||||||
|
|
||||||
|
|
||||||
|
def _print_copyable_lines(lines):
|
||||||
|
for line in lines:
|
||||||
|
line = repr(line)[1:-1]
|
||||||
|
if line.endswith(r'\n'):
|
||||||
|
line = line[:-2] + '\n'
|
||||||
|
print(line, end='')
|
||||||
|
|
||||||
|
|
||||||
|
def _get_first_error_start_pos_or_none(module):
|
||||||
|
error_leaf = _check_error_leaves_nodes(module)
|
||||||
|
return None if error_leaf is None else error_leaf.start_pos
|
||||||
|
|
||||||
|
|
||||||
|
class LineReplacement:
|
||||||
|
def __init__(self, line_nr, new_line):
|
||||||
|
self._line_nr = line_nr
|
||||||
|
self._new_line = new_line
|
||||||
|
|
||||||
|
def apply(self, code_lines):
|
||||||
|
# print(repr(self._new_line))
|
||||||
|
code_lines[self._line_nr] = self._new_line
|
||||||
|
|
||||||
|
|
||||||
|
class LineDeletion:
|
||||||
|
def __init__(self, line_nr):
|
||||||
|
self.line_nr = line_nr
|
||||||
|
|
||||||
|
def apply(self, code_lines):
|
||||||
|
del code_lines[self.line_nr]
|
||||||
|
|
||||||
|
|
||||||
|
class LineCopy:
|
||||||
|
def __init__(self, copy_line, insertion_line):
|
||||||
|
self._copy_line = copy_line
|
||||||
|
self._insertion_line = insertion_line
|
||||||
|
|
||||||
|
def apply(self, code_lines):
|
||||||
|
code_lines.insert(
|
||||||
|
self._insertion_line,
|
||||||
|
# Use some line from the file. This doesn't feel totally
|
||||||
|
# random, but for the diff parser it will feel like it.
|
||||||
|
code_lines[self._copy_line]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FileModification:
|
||||||
|
@classmethod
|
||||||
|
def generate(cls, code_lines, change_count):
|
||||||
|
return cls(
|
||||||
|
list(cls._generate_line_modifications(code_lines, change_count)),
|
||||||
|
# work with changed trees more than with normal ones.
|
||||||
|
check_original=random.random() > 0.8,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_line_modifications(lines, change_count):
|
||||||
|
def random_line(include_end=False):
|
||||||
|
return random.randint(0, len(lines) - (not include_end))
|
||||||
|
|
||||||
|
lines = list(lines)
|
||||||
|
for _ in range(change_count):
|
||||||
|
rand = random.randint(1, 4)
|
||||||
|
if rand == 1:
|
||||||
|
if len(lines) == 1:
|
||||||
|
# We cannot delete every line, that doesn't make sense to
|
||||||
|
# fuzz and it would be annoying to rewrite everything here.
|
||||||
|
continue
|
||||||
|
l = LineDeletion(random_line())
|
||||||
|
elif rand == 2:
|
||||||
|
# Copy / Insertion
|
||||||
|
# Make it possible to insert into the first and the last line
|
||||||
|
l = LineCopy(random_line(), random_line(include_end=True))
|
||||||
|
elif rand in (3, 4):
|
||||||
|
# Modify a line in some weird random ways.
|
||||||
|
line_nr = random_line()
|
||||||
|
line = lines[line_nr]
|
||||||
|
column = random.randint(0, len(line))
|
||||||
|
random_string = ''
|
||||||
|
for _ in range(random.randint(1, 3)):
|
||||||
|
if random.random() > 0.8:
|
||||||
|
# The lower characters cause way more issues.
|
||||||
|
unicode_range = 0x1f if random.randint(0, 1) else 0x3000
|
||||||
|
random_string += chr(random.randint(0, unicode_range))
|
||||||
|
else:
|
||||||
|
# These insertions let us understand how random
|
||||||
|
# keyword/operator insertions work. Theoretically this
|
||||||
|
# could also be done with unicode insertions, but the
|
||||||
|
# fuzzer is just way more effective here.
|
||||||
|
random_string += random.choice(_random_python_fragments)
|
||||||
|
if random.random() > 0.5:
|
||||||
|
# In this case we insert at a very random place that
|
||||||
|
# probably breaks syntax.
|
||||||
|
line = line[:column] + random_string + line[column:]
|
||||||
|
else:
|
||||||
|
# Here we have better chances to not break syntax, because
|
||||||
|
# we really replace the line with something that has
|
||||||
|
# indentation.
|
||||||
|
line = ' ' * random.randint(0, 12) + random_string + '\n'
|
||||||
|
l = LineReplacement(line_nr, line)
|
||||||
|
l.apply(lines)
|
||||||
|
yield l
|
||||||
|
|
||||||
|
def __init__(self, modification_list, check_original):
|
||||||
|
self._modification_list = modification_list
|
||||||
|
self._check_original = check_original
|
||||||
|
|
||||||
|
def _apply(self, code_lines):
|
||||||
|
changed_lines = list(code_lines)
|
||||||
|
for modification in self._modification_list:
|
||||||
|
modification.apply(changed_lines)
|
||||||
|
return changed_lines
|
||||||
|
|
||||||
|
def run(self, grammar, code_lines, print_code):
|
||||||
|
code = ''.join(code_lines)
|
||||||
|
modified_lines = self._apply(code_lines)
|
||||||
|
modified_code = ''.join(modified_lines)
|
||||||
|
|
||||||
|
if print_code:
|
||||||
|
if self._check_original:
|
||||||
|
print('Original:')
|
||||||
|
_print_copyable_lines(code_lines)
|
||||||
|
|
||||||
|
print('\nModified:')
|
||||||
|
_print_copyable_lines(modified_lines)
|
||||||
|
print()
|
||||||
|
|
||||||
|
if self._check_original:
|
||||||
|
m = grammar.parse(code, diff_cache=True)
|
||||||
|
start1 = _get_first_error_start_pos_or_none(m)
|
||||||
|
|
||||||
|
grammar.parse(modified_code, diff_cache=True)
|
||||||
|
|
||||||
|
if self._check_original:
|
||||||
|
# Also check if it's possible to "revert" the changes.
|
||||||
|
m = grammar.parse(code, diff_cache=True)
|
||||||
|
start2 = _get_first_error_start_pos_or_none(m)
|
||||||
|
assert start1 == start2, (start1, start2)
|
||||||
|
|
||||||
|
|
||||||
|
class FileTests:
|
||||||
|
def __init__(self, file_path, test_count, change_count):
|
||||||
|
self._path = file_path
|
||||||
|
with open(file_path) as f:
|
||||||
|
code = f.read()
|
||||||
|
self._code_lines = split_lines(code, keepends=True)
|
||||||
|
self._test_count = test_count
|
||||||
|
self._code_lines = self._code_lines
|
||||||
|
self._change_count = change_count
|
||||||
|
self._file_modifications = []
|
||||||
|
|
||||||
|
def _run(self, grammar, file_modifications, debugger, print_code=False):
|
||||||
|
try:
|
||||||
|
for i, fm in enumerate(file_modifications, 1):
|
||||||
|
fm.run(grammar, self._code_lines, print_code=print_code)
|
||||||
|
print('.', end='')
|
||||||
|
sys.stdout.flush()
|
||||||
|
print()
|
||||||
|
except Exception:
|
||||||
|
print("Issue in file: %s" % self._path)
|
||||||
|
if debugger:
|
||||||
|
einfo = sys.exc_info()
|
||||||
|
pdb = __import__(debugger)
|
||||||
|
pdb.post_mortem(einfo[2])
|
||||||
|
raise
|
||||||
|
|
||||||
|
def redo(self, grammar, debugger, only_last, print_code):
|
||||||
|
mods = self._file_modifications
|
||||||
|
if only_last is not None:
|
||||||
|
mods = mods[-only_last:]
|
||||||
|
self._run(grammar, mods, debugger, print_code=print_code)
|
||||||
|
|
||||||
|
def run(self, grammar, debugger):
|
||||||
|
def iterate():
|
||||||
|
for _ in range(self._test_count):
|
||||||
|
fm = FileModification.generate(self._code_lines, self._change_count)
|
||||||
|
self._file_modifications.append(fm)
|
||||||
|
yield fm
|
||||||
|
|
||||||
|
self._run(grammar, iterate(), debugger)
|
||||||
|
|
||||||
|
|
||||||
|
def main(arguments):
|
||||||
|
debugger = 'pdb' if arguments['--pdb'] else \
|
||||||
|
'ipdb' if arguments['--ipdb'] else None
|
||||||
|
redo_file = os.path.join(os.path.dirname(__file__), 'fuzz-redo.pickle')
|
||||||
|
|
||||||
|
if arguments['--logging']:
|
||||||
|
root = logging.getLogger()
|
||||||
|
root.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
ch = logging.StreamHandler(sys.stdout)
|
||||||
|
ch.setLevel(logging.DEBUG)
|
||||||
|
root.addHandler(ch)
|
||||||
|
|
||||||
|
grammar = parso.load_grammar()
|
||||||
|
parso.python.diff.DEBUG_DIFF_PARSER = True
|
||||||
|
if arguments['redo']:
|
||||||
|
with open(redo_file, 'rb') as f:
|
||||||
|
file_tests_obj = pickle.load(f)
|
||||||
|
only_last = arguments['--only-last'] and int(arguments['--only-last'])
|
||||||
|
file_tests_obj.redo(
|
||||||
|
grammar,
|
||||||
|
debugger,
|
||||||
|
only_last=only_last,
|
||||||
|
print_code=arguments['--print-code']
|
||||||
|
)
|
||||||
|
elif arguments['random']:
|
||||||
|
# A random file is used to do diff parser checks if no file is given.
|
||||||
|
# This helps us to find errors in a lot of different files.
|
||||||
|
file_paths = list(find_python_files_in_tree(arguments['<path>'] or '.'))
|
||||||
|
max_tries = int(arguments['--maxtries'])
|
||||||
|
tries = 0
|
||||||
|
try:
|
||||||
|
while tries < max_tries:
|
||||||
|
path = random.choice(file_paths)
|
||||||
|
print("Checking %s: %s tries" % (path, tries))
|
||||||
|
now_tries = min(1000, max_tries - tries)
|
||||||
|
file_tests_obj = FileTests(path, now_tries, int(arguments['--changes']))
|
||||||
|
file_tests_obj.run(grammar, debugger)
|
||||||
|
tries += now_tries
|
||||||
|
except Exception:
|
||||||
|
with open(redo_file, 'wb') as f:
|
||||||
|
pickle.dump(file_tests_obj, f)
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
raise NotImplementedError('Command is not implemented')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from docopt import docopt
|
||||||
|
|
||||||
|
arguments = docopt(__doc__)
|
||||||
|
main(arguments)
|
||||||
@@ -1,14 +1,18 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from parso.utils import split_lines
|
from parso.utils import split_lines
|
||||||
from parso import cache
|
from parso import cache
|
||||||
from parso import load_grammar
|
from parso import load_grammar
|
||||||
from parso.python.diff import DiffParser
|
from parso.python.diff import DiffParser, _assert_valid_graph
|
||||||
from parso import parse
|
from parso import parse
|
||||||
|
|
||||||
|
ANY = object()
|
||||||
|
|
||||||
|
|
||||||
def test_simple():
|
def test_simple():
|
||||||
"""
|
"""
|
||||||
@@ -21,7 +25,7 @@ def test_simple():
|
|||||||
|
|
||||||
def _check_error_leaves_nodes(node):
|
def _check_error_leaves_nodes(node):
|
||||||
if node.type in ('error_leaf', 'error_node'):
|
if node.type in ('error_leaf', 'error_node'):
|
||||||
return True
|
return node
|
||||||
|
|
||||||
try:
|
try:
|
||||||
children = node.children
|
children = node.children
|
||||||
@@ -29,23 +33,10 @@ def _check_error_leaves_nodes(node):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
for child in children:
|
for child in children:
|
||||||
if _check_error_leaves_nodes(child):
|
x_node = _check_error_leaves_nodes(child)
|
||||||
return True
|
if x_node is not None:
|
||||||
return False
|
return x_node
|
||||||
|
return None
|
||||||
|
|
||||||
def _assert_valid_graph(node):
|
|
||||||
"""
|
|
||||||
Checks if the parent/children relationship is correct.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
children = node.children
|
|
||||||
except AttributeError:
|
|
||||||
return
|
|
||||||
|
|
||||||
for child in children:
|
|
||||||
assert child.parent == node
|
|
||||||
_assert_valid_graph(child)
|
|
||||||
|
|
||||||
|
|
||||||
class Differ(object):
|
class Differ(object):
|
||||||
@@ -60,6 +51,8 @@ class Differ(object):
|
|||||||
|
|
||||||
self.lines = split_lines(code, keepends=True)
|
self.lines = split_lines(code, keepends=True)
|
||||||
self.module = parse(code, diff_cache=True, cache=True)
|
self.module = parse(code, diff_cache=True, cache=True)
|
||||||
|
assert code == self.module.get_code()
|
||||||
|
_assert_valid_graph(self.module)
|
||||||
return self.module
|
return self.module
|
||||||
|
|
||||||
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
|
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
|
||||||
@@ -73,11 +66,15 @@ class Differ(object):
|
|||||||
new_module = diff_parser.update(self.lines, lines)
|
new_module = diff_parser.update(self.lines, lines)
|
||||||
self.lines = lines
|
self.lines = lines
|
||||||
assert code == new_module.get_code()
|
assert code == new_module.get_code()
|
||||||
assert diff_parser._copy_count == copies
|
|
||||||
#assert diff_parser._parser_count == parsers
|
|
||||||
|
|
||||||
assert expect_error_leaves == _check_error_leaves_nodes(new_module)
|
|
||||||
_assert_valid_graph(new_module)
|
_assert_valid_graph(new_module)
|
||||||
|
|
||||||
|
error_node = _check_error_leaves_nodes(new_module)
|
||||||
|
assert expect_error_leaves == (error_node is not None), error_node
|
||||||
|
if parsers is not ANY:
|
||||||
|
assert diff_parser._parser_count == parsers
|
||||||
|
if copies is not ANY:
|
||||||
|
assert diff_parser._copy_count == copies
|
||||||
return new_module
|
return new_module
|
||||||
|
|
||||||
|
|
||||||
@@ -122,7 +119,7 @@ def test_positions(differ):
|
|||||||
|
|
||||||
m = differ.parse('a\n\n', parsers=1)
|
m = differ.parse('a\n\n', parsers=1)
|
||||||
assert m.end_pos == (3, 0)
|
assert m.end_pos == (3, 0)
|
||||||
m = differ.parse('a\n\n ', copies=1, parsers=1)
|
m = differ.parse('a\n\n ', copies=1, parsers=2)
|
||||||
assert m.end_pos == (3, 1)
|
assert m.end_pos == (3, 1)
|
||||||
m = differ.parse('a ', parsers=1)
|
m = differ.parse('a ', parsers=1)
|
||||||
assert m.end_pos == (1, 2)
|
assert m.end_pos == (1, 2)
|
||||||
@@ -138,7 +135,7 @@ def test_if_simple(differ):
|
|||||||
differ.initialize(src + 'a')
|
differ.initialize(src + 'a')
|
||||||
differ.parse(src + else_ + "a", copies=0, parsers=1)
|
differ.parse(src + else_ + "a", copies=0, parsers=1)
|
||||||
|
|
||||||
differ.parse(else_, parsers=1, expect_error_leaves=True)
|
differ.parse(else_, parsers=1, copies=1, expect_error_leaves=True)
|
||||||
differ.parse(src + else_, parsers=1)
|
differ.parse(src + else_, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
@@ -208,7 +205,7 @@ def test_open_parentheses(differ):
|
|||||||
differ.parse(new_code, parsers=1, expect_error_leaves=True)
|
differ.parse(new_code, parsers=1, expect_error_leaves=True)
|
||||||
|
|
||||||
new_code = 'a = 1\n' + new_code
|
new_code = 'a = 1\n' + new_code
|
||||||
differ.parse(new_code, copies=1, parsers=1, expect_error_leaves=True)
|
differ.parse(new_code, parsers=2, expect_error_leaves=True)
|
||||||
|
|
||||||
func += 'def other_func():\n pass\n'
|
func += 'def other_func():\n pass\n'
|
||||||
differ.initialize('isinstance(\n' + func)
|
differ.initialize('isinstance(\n' + func)
|
||||||
@@ -222,6 +219,7 @@ def test_open_parentheses_at_end(differ):
|
|||||||
differ.initialize(code)
|
differ.initialize(code)
|
||||||
differ.parse(code, parsers=1, expect_error_leaves=True)
|
differ.parse(code, parsers=1, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
def test_backslash(differ):
|
def test_backslash(differ):
|
||||||
src = dedent(r"""
|
src = dedent(r"""
|
||||||
a = 1\
|
a = 1\
|
||||||
@@ -255,7 +253,7 @@ def test_backslash(differ):
|
|||||||
def test_full_copy(differ):
|
def test_full_copy(differ):
|
||||||
code = 'def foo(bar, baz):\n pass\n bar'
|
code = 'def foo(bar, baz):\n pass\n bar'
|
||||||
differ.initialize(code)
|
differ.initialize(code)
|
||||||
differ.parse(code, copies=1, parsers=1)
|
differ.parse(code, copies=1)
|
||||||
|
|
||||||
|
|
||||||
def test_wrong_whitespace(differ):
|
def test_wrong_whitespace(differ):
|
||||||
@@ -263,10 +261,10 @@ def test_wrong_whitespace(differ):
|
|||||||
hello
|
hello
|
||||||
'''
|
'''
|
||||||
differ.initialize(code)
|
differ.initialize(code)
|
||||||
differ.parse(code + 'bar\n ', parsers=1)
|
differ.parse(code + 'bar\n ', parsers=3)
|
||||||
|
|
||||||
code += """abc(\npass\n """
|
code += """abc(\npass\n """
|
||||||
differ.parse(code, parsers=1, copies=1, expect_error_leaves=True)
|
differ.parse(code, parsers=2, copies=1, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
def test_issues_with_error_leaves(differ):
|
def test_issues_with_error_leaves(differ):
|
||||||
@@ -367,7 +365,7 @@ def test_totally_wrong_whitespace(differ):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=3, copies=0, expect_error_leaves=True)
|
differ.parse(code2, parsers=4, copies=0, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
def test_node_insertion(differ):
|
def test_node_insertion(differ):
|
||||||
@@ -466,6 +464,9 @@ def test_in_parentheses_newlines(differ):
|
|||||||
|
|
||||||
b = 2""")
|
b = 2""")
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
def test_indentation_issue(differ):
|
def test_indentation_issue(differ):
|
||||||
code1 = dedent("""
|
code1 = dedent("""
|
||||||
@@ -483,4 +484,803 @@ def test_indentation_issue(differ):
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=2)
|
differ.parse(code2, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_endmarker_newline(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
docu = None
|
||||||
|
# some comment
|
||||||
|
result = codet
|
||||||
|
incomplete_dctassign = {
|
||||||
|
"module"
|
||||||
|
|
||||||
|
if "a":
|
||||||
|
x = 3 # asdf
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = code1.replace('codet', 'coded')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_newlines_at_end(differ):
|
||||||
|
differ.initialize('a\n\n')
|
||||||
|
differ.parse('a\n', copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_end_newline_with_decorator(differ):
|
||||||
|
code = dedent('''\
|
||||||
|
@staticmethod
|
||||||
|
def spam():
|
||||||
|
import json
|
||||||
|
json.l''')
|
||||||
|
|
||||||
|
differ.initialize(code)
|
||||||
|
module = differ.parse(code + '\n', copies=1, parsers=1)
|
||||||
|
decorated, endmarker = module.children
|
||||||
|
assert decorated.type == 'decorated'
|
||||||
|
decorator, func = decorated.children
|
||||||
|
suite = func.children[-1]
|
||||||
|
assert suite.type == 'suite'
|
||||||
|
newline, first_stmt, second_stmt = suite.children
|
||||||
|
assert first_stmt.get_code() == ' import json\n'
|
||||||
|
assert second_stmt.get_code() == ' json.l\n'
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_to_valid_nodes(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def a():
|
||||||
|
foo = 3
|
||||||
|
def b():
|
||||||
|
la = 3
|
||||||
|
else:
|
||||||
|
la
|
||||||
|
return
|
||||||
|
foo
|
||||||
|
base
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
def a():
|
||||||
|
foo = 3
|
||||||
|
def b():
|
||||||
|
la = 3
|
||||||
|
if foo:
|
||||||
|
latte = 3
|
||||||
|
else:
|
||||||
|
la
|
||||||
|
return
|
||||||
|
foo
|
||||||
|
base
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=3)
|
||||||
|
|
||||||
|
|
||||||
|
def test_if_removal_and_reappearence(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
la = 3
|
||||||
|
if foo:
|
||||||
|
latte = 3
|
||||||
|
else:
|
||||||
|
la
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
la = 3
|
||||||
|
latte = 3
|
||||||
|
else:
|
||||||
|
la
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
code3 = dedent('''\
|
||||||
|
la = 3
|
||||||
|
if foo:
|
||||||
|
latte = 3
|
||||||
|
else:
|
||||||
|
la
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=4, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
differ.parse(code3, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_error_indentation(differ):
|
||||||
|
code = 'if x:\n 1\n'
|
||||||
|
differ.initialize(code)
|
||||||
|
differ.parse(code + ' 2\n', parsers=1, copies=0, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_differing_docstrings(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def foobar(x, y):
|
||||||
|
1
|
||||||
|
return x
|
||||||
|
|
||||||
|
def bazbiz():
|
||||||
|
foobar()
|
||||||
|
lala
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
def foobar(x, y):
|
||||||
|
2
|
||||||
|
return x + y
|
||||||
|
|
||||||
|
def bazbiz():
|
||||||
|
z = foobar()
|
||||||
|
lala
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=3, copies=1)
|
||||||
|
differ.parse(code1, parsers=3, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_one_call_in_function_change(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def f(self):
|
||||||
|
mro = [self]
|
||||||
|
for a in something:
|
||||||
|
yield a
|
||||||
|
|
||||||
|
def g(self):
|
||||||
|
return C(
|
||||||
|
a=str,
|
||||||
|
b=self,
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
def f(self):
|
||||||
|
mro = [self]
|
||||||
|
|
||||||
|
def g(self):
|
||||||
|
return C(
|
||||||
|
a=str,
|
||||||
|
t
|
||||||
|
b=self,
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=2, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_function_deletion(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C(list):
|
||||||
|
def f(self):
|
||||||
|
def iterate():
|
||||||
|
for x in b:
|
||||||
|
break
|
||||||
|
|
||||||
|
return list(iterate())
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C():
|
||||||
|
def f(self):
|
||||||
|
for x in b:
|
||||||
|
break
|
||||||
|
|
||||||
|
return list(iterate())
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=0, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_docstring_removal(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class E(Exception):
|
||||||
|
"""
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
"""
|
||||||
|
|
||||||
|
class S(object):
|
||||||
|
@property
|
||||||
|
def f(self):
|
||||||
|
return cmd
|
||||||
|
def __repr__(self):
|
||||||
|
return cmd2
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
class E(Exception):
|
||||||
|
"""
|
||||||
|
1
|
||||||
|
3
|
||||||
|
"""
|
||||||
|
|
||||||
|
class S(object):
|
||||||
|
@property
|
||||||
|
def f(self):
|
||||||
|
return cmd
|
||||||
|
return cmd2
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=2)
|
||||||
|
differ.parse(code1, parsers=2, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_paren_in_strange_position(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
""" ha """
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
""" ha """
|
||||||
|
)
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=0, copies=2)
|
||||||
|
|
||||||
|
|
||||||
|
def insert_line_into_code(code, index, line):
|
||||||
|
lines = split_lines(code, keepends=True)
|
||||||
|
lines.insert(index, line)
|
||||||
|
return ''.join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def test_paren_before_docstring(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
# comment
|
||||||
|
"""
|
||||||
|
The
|
||||||
|
"""
|
||||||
|
from parso import tree
|
||||||
|
from parso import python
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = insert_line_into_code(code1, 1, ' ' * 16 + 'raise InternalParseError(\n')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=2, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_parentheses_before_method(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class A:
|
||||||
|
def a(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class B:
|
||||||
|
def b(self):
|
||||||
|
if 1:
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
class A:
|
||||||
|
def a(self):
|
||||||
|
pass
|
||||||
|
Exception.__init__(self, "x" %
|
||||||
|
|
||||||
|
def b(self):
|
||||||
|
if 1:
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_indentation_issues(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
1
|
||||||
|
if 2:
|
||||||
|
return 3
|
||||||
|
|
||||||
|
def g():
|
||||||
|
to_be_removed
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
1
|
||||||
|
``something``, very ``weird``).
|
||||||
|
if 2:
|
||||||
|
return 3
|
||||||
|
|
||||||
|
def g():
|
||||||
|
to_be_removed
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
code3 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
1
|
||||||
|
if 2:
|
||||||
|
return 3
|
||||||
|
|
||||||
|
def g():
|
||||||
|
pass
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=2)
|
||||||
|
differ.parse(code3, parsers=2, copies=1)
|
||||||
|
differ.parse(code1, parsers=1, copies=2)
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_dedent_issues(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
1
|
||||||
|
except KeyError:
|
||||||
|
if 2:
|
||||||
|
3
|
||||||
|
except IndexError:
|
||||||
|
4
|
||||||
|
|
||||||
|
5
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
except KeyError:
|
||||||
|
1
|
||||||
|
except KeyError:
|
||||||
|
if 2:
|
||||||
|
3
|
||||||
|
except IndexError:
|
||||||
|
4
|
||||||
|
|
||||||
|
something_inserted
|
||||||
|
5
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=6, copies=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_random_text_insertion(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
return node
|
||||||
|
|
||||||
|
def g():
|
||||||
|
try:
|
||||||
|
1
|
||||||
|
except KeyError:
|
||||||
|
2
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
return node
|
||||||
|
Some'random text: yeah
|
||||||
|
for push in plan.dfa_pushes:
|
||||||
|
|
||||||
|
def g():
|
||||||
|
try:
|
||||||
|
1
|
||||||
|
except KeyError:
|
||||||
|
2
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_many_nested_ifs(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f(self):
|
||||||
|
def iterate():
|
||||||
|
if 1:
|
||||||
|
yield t
|
||||||
|
else:
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
|
||||||
|
def g():
|
||||||
|
3
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = dedent('''\
|
||||||
|
def f(self):
|
||||||
|
def iterate():
|
||||||
|
if 1:
|
||||||
|
yield t
|
||||||
|
hahahaha
|
||||||
|
if 2:
|
||||||
|
else:
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
|
||||||
|
def g():
|
||||||
|
3
|
||||||
|
''')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=2, copies=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5")
|
||||||
|
@pytest.mark.parametrize('prefix', ['', 'async '])
|
||||||
|
def test_with_and_funcdef_in_call(differ, prefix):
|
||||||
|
code1 = prefix + dedent('''\
|
||||||
|
with x:
|
||||||
|
la = C(
|
||||||
|
a=1,
|
||||||
|
b=2,
|
||||||
|
c=3,
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = insert_line_into_code(code1, 3, 'def y(self, args):\n')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=3, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_wrong_backslash(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def y():
|
||||||
|
1
|
||||||
|
for x in y:
|
||||||
|
continue
|
||||||
|
''')
|
||||||
|
|
||||||
|
code2 = insert_line_into_code(code1, 3, '\\.whl$\n')
|
||||||
|
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_comment_change(differ):
|
||||||
|
differ.initialize('')
|
||||||
|
|
||||||
|
|
||||||
|
def test_random_unicode_characters(differ):
|
||||||
|
"""
|
||||||
|
Those issues were all found with the fuzzer.
|
||||||
|
"""
|
||||||
|
differ.initialize('')
|
||||||
|
differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(u'\r\r', parsers=1)
|
||||||
|
differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1)
|
||||||
|
s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):'
|
||||||
|
differ.parse(s, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse('')
|
||||||
|
differ.parse(s + '\n', parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(u' result = (\r\f\x17\t\x11res)', parsers=2, expect_error_leaves=True)
|
||||||
|
differ.parse('')
|
||||||
|
differ.parse(' a( # xx\ndef', parsers=2, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.version_info < (2, 7), reason="No set literals in Python 2.6")
|
||||||
|
def test_dedent_end_positions(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
if 1:
|
||||||
|
if b:
|
||||||
|
2
|
||||||
|
c = {
|
||||||
|
5}
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
if 1:
|
||||||
|
if ⌟ഒᜈྡྷṭb:
|
||||||
|
2
|
||||||
|
'l': ''}
|
||||||
|
c = {
|
||||||
|
5}
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_special_no_newline_ending(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
1
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
1
|
||||||
|
is ''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_random_character_insertion(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def create(self):
|
||||||
|
1
|
||||||
|
if self.path is not None:
|
||||||
|
return
|
||||||
|
# 3
|
||||||
|
# 4
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
def create(self):
|
||||||
|
1
|
||||||
|
if 2:
|
||||||
|
x return
|
||||||
|
# 3
|
||||||
|
# 4
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=3, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_import_opening_bracket(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
1
|
||||||
|
2
|
||||||
|
from bubu import (X,
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
11
|
||||||
|
2
|
||||||
|
from bubu import (X,
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=2, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_opening_bracket_at_end(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
[
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
3
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
[
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_all_sorts_of_indentation(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
def f():
|
||||||
|
'same'
|
||||||
|
|
||||||
|
if foo:
|
||||||
|
a = b
|
||||||
|
end
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
def f(yield await %|(
|
||||||
|
'same'
|
||||||
|
|
||||||
|
\x02\x06\x0f\x1c\x11
|
||||||
|
if foo:
|
||||||
|
a = b
|
||||||
|
|
||||||
|
end
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=4, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=3)
|
||||||
|
|
||||||
|
code3 = dedent('''\
|
||||||
|
if 1:
|
||||||
|
a
|
||||||
|
b
|
||||||
|
c
|
||||||
|
d
|
||||||
|
\x00
|
||||||
|
''')
|
||||||
|
differ.parse(code3, parsers=2, expect_error_leaves=True)
|
||||||
|
differ.parse('')
|
||||||
|
|
||||||
|
|
||||||
|
def test_dont_copy_dedents_in_beginning(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
a
|
||||||
|
4
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
4
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=2)
|
||||||
|
|
||||||
|
|
||||||
|
def test_dont_copy_error_leaves(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
def f(n):
|
||||||
|
x
|
||||||
|
if 2:
|
||||||
|
3
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
def f(n):
|
||||||
|
def if 1:
|
||||||
|
indent
|
||||||
|
x
|
||||||
|
if 2:
|
||||||
|
3
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, parsers=2)
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_dedent_in_between(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
a
|
||||||
|
if something:
|
||||||
|
x
|
||||||
|
z
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f():
|
||||||
|
a
|
||||||
|
dedent
|
||||||
|
if other_thing:
|
||||||
|
b
|
||||||
|
if something:
|
||||||
|
x
|
||||||
|
z
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=2)
|
||||||
|
|
||||||
|
|
||||||
|
def test_some_other_indentation_issues(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
x
|
||||||
|
def f():
|
||||||
|
""
|
||||||
|
copied
|
||||||
|
a
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
try:
|
||||||
|
de
|
||||||
|
a
|
||||||
|
b
|
||||||
|
c
|
||||||
|
d
|
||||||
|
def f():
|
||||||
|
""
|
||||||
|
copied
|
||||||
|
a
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=2, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=2, parsers=2)
|
||||||
|
|
||||||
|
|
||||||
|
def test_open_bracket_case1(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
2 # ha
|
||||||
|
''')
|
||||||
|
code2 = insert_line_into_code(code1, 2, ' [str\n')
|
||||||
|
code3 = insert_line_into_code(code2, 4, ' str\n')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code3, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1, parsers=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_open_bracket_case2(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f(self):
|
||||||
|
(
|
||||||
|
b
|
||||||
|
c
|
||||||
|
|
||||||
|
def g(self):
|
||||||
|
d
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
def f(self):
|
||||||
|
(
|
||||||
|
b
|
||||||
|
c
|
||||||
|
self.
|
||||||
|
|
||||||
|
def g(self):
|
||||||
|
d
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=2, parsers=0, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_some_weird_removals(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
@property
|
||||||
|
A
|
||||||
|
return
|
||||||
|
# x
|
||||||
|
omega
|
||||||
|
''')
|
||||||
|
code3 = dedent('''\
|
||||||
|
class C:
|
||||||
|
1
|
||||||
|
;
|
||||||
|
omega
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
differ.parse(code3, copies=1, parsers=2, expect_error_leaves=True)
|
||||||
|
differ.parse(code1, copies=1)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5")
|
||||||
|
def test_async_copy(differ):
|
||||||
|
code1 = dedent('''\
|
||||||
|
async def main():
|
||||||
|
x = 3
|
||||||
|
print(
|
||||||
|
''')
|
||||||
|
code2 = dedent('''\
|
||||||
|
async def main():
|
||||||
|
x = 3
|
||||||
|
print()
|
||||||
|
''')
|
||||||
|
differ.initialize(code1)
|
||||||
|
differ.parse(code2, copies=1, parsers=1)
|
||||||
|
differ.parse(code1, copies=1, parsers=1, expect_error_leaves=True)
|
||||||
|
|||||||
85
test/test_error_recovery.py
Normal file
85
test/test_error_recovery.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
from parso import parse, load_grammar
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_stmt():
|
||||||
|
module = parse('with x: f.\na')
|
||||||
|
assert module.children[0].type == 'with_stmt'
|
||||||
|
w, with_item, colon, f = module.children[0].children
|
||||||
|
assert f.type == 'error_node'
|
||||||
|
assert f.get_code(include_prefix=False) == 'f.'
|
||||||
|
|
||||||
|
assert module.children[2].type == 'name'
|
||||||
|
|
||||||
|
|
||||||
|
def test_one_line_function(each_version):
|
||||||
|
module = parse('def x(): f.', version=each_version)
|
||||||
|
assert module.children[0].type == 'funcdef'
|
||||||
|
def_, name, parameters, colon, f = module.children[0].children
|
||||||
|
assert f.type == 'error_node'
|
||||||
|
|
||||||
|
module = parse('def x(a:', version=each_version)
|
||||||
|
func = module.children[0]
|
||||||
|
assert func.type == 'error_node'
|
||||||
|
if each_version.startswith('2'):
|
||||||
|
assert func.children[-1].value == 'a'
|
||||||
|
else:
|
||||||
|
assert func.children[-1] == ':'
|
||||||
|
|
||||||
|
|
||||||
|
def test_if_else():
|
||||||
|
module = parse('if x:\n f.\nelse:\n g(')
|
||||||
|
if_stmt = module.children[0]
|
||||||
|
if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children
|
||||||
|
f = suite1.children[1]
|
||||||
|
assert f.type == 'error_node'
|
||||||
|
assert f.children[0].value == 'f'
|
||||||
|
assert f.children[1].value == '.'
|
||||||
|
g = suite2.children[1]
|
||||||
|
assert g.children[0].value == 'g'
|
||||||
|
assert g.children[1].value == '('
|
||||||
|
|
||||||
|
|
||||||
|
def test_if_stmt():
|
||||||
|
module = parse('if x: f.\nelse: g(')
|
||||||
|
if_stmt = module.children[0]
|
||||||
|
assert if_stmt.type == 'if_stmt'
|
||||||
|
if_, test, colon, f = if_stmt.children
|
||||||
|
assert f.type == 'error_node'
|
||||||
|
assert f.children[0].value == 'f'
|
||||||
|
assert f.children[1].value == '.'
|
||||||
|
|
||||||
|
assert module.children[1].type == 'newline'
|
||||||
|
assert module.children[1].value == '\n'
|
||||||
|
assert module.children[2].type == 'error_leaf'
|
||||||
|
assert module.children[2].value == 'else'
|
||||||
|
assert module.children[3].type == 'error_leaf'
|
||||||
|
assert module.children[3].value == ':'
|
||||||
|
|
||||||
|
in_else_stmt = module.children[4]
|
||||||
|
assert in_else_stmt.type == 'error_node'
|
||||||
|
assert in_else_stmt.children[0].value == 'g'
|
||||||
|
assert in_else_stmt.children[1].value == '('
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_token():
|
||||||
|
module = parse('a + ? + b')
|
||||||
|
error_node, q, plus_b, endmarker = module.children
|
||||||
|
assert error_node.get_code() == 'a +'
|
||||||
|
assert q.value == '?'
|
||||||
|
assert q.type == 'error_leaf'
|
||||||
|
assert plus_b.type == 'factor'
|
||||||
|
assert plus_b.get_code() == ' + b'
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_token_in_fstr():
|
||||||
|
module = load_grammar(version='3.6').parse('f"{a + ? + b}"')
|
||||||
|
error_node, q, plus_b, error1, error2, endmarker = module.children
|
||||||
|
assert error_node.get_code() == 'f"{a +'
|
||||||
|
assert q.value == '?'
|
||||||
|
assert q.type == 'error_leaf'
|
||||||
|
assert plus_b.type == 'error_node'
|
||||||
|
assert plus_b.get_code() == ' + b'
|
||||||
|
assert error1.value == '}'
|
||||||
|
assert error1.type == 'error_leaf'
|
||||||
|
assert error2.value == '"'
|
||||||
|
assert error2.type == 'error_leaf'
|
||||||
@@ -1,17 +1,19 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
from textwrap import dedent
|
||||||
|
|
||||||
from parso import load_grammar, ParserSyntaxError
|
from parso import load_grammar, ParserSyntaxError
|
||||||
from parso.python.fstring import tokenize
|
from parso.python.tokenize import tokenize
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def grammar():
|
def grammar():
|
||||||
return load_grammar(language="python-f-string")
|
return load_grammar(version='3.6')
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
'code', [
|
'code', [
|
||||||
'{1}',
|
'{1}',
|
||||||
|
'{1:}',
|
||||||
'',
|
'',
|
||||||
'{1!a}',
|
'{1!a}',
|
||||||
'{1!a:1}',
|
'{1!a:1}',
|
||||||
@@ -26,22 +28,12 @@ def grammar():
|
|||||||
'{{{1}',
|
'{{{1}',
|
||||||
'1{{2{{3',
|
'1{{2{{3',
|
||||||
'}}',
|
'}}',
|
||||||
'{:}}}',
|
|
||||||
|
|
||||||
# Invalid, but will be checked, later.
|
|
||||||
'{}',
|
|
||||||
'{1:}',
|
|
||||||
'{:}',
|
|
||||||
'{:1}',
|
|
||||||
'{!:}',
|
|
||||||
'{!}',
|
|
||||||
'{!a}',
|
|
||||||
'{1:{}}',
|
|
||||||
'{1:{:}}',
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
def test_valid(code, grammar):
|
def test_valid(code, grammar):
|
||||||
fstring = grammar.parse(code, error_recovery=False)
|
code = 'f"""%s"""' % code
|
||||||
|
module = grammar.parse(code, error_recovery=False)
|
||||||
|
fstring = module.children[0]
|
||||||
assert fstring.type == 'fstring'
|
assert fstring.type == 'fstring'
|
||||||
assert fstring.get_code() == code
|
assert fstring.get_code() == code
|
||||||
|
|
||||||
@@ -52,24 +44,46 @@ def test_valid(code, grammar):
|
|||||||
'{',
|
'{',
|
||||||
'{1!{a}}',
|
'{1!{a}}',
|
||||||
'{!{a}}',
|
'{!{a}}',
|
||||||
|
'{}',
|
||||||
|
'{:}',
|
||||||
|
'{:}}}',
|
||||||
|
'{:1}',
|
||||||
|
'{!:}',
|
||||||
|
'{!}',
|
||||||
|
'{!a}',
|
||||||
|
'{1:{}}',
|
||||||
|
'{1:{:}}',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
def test_invalid(code, grammar):
|
def test_invalid(code, grammar):
|
||||||
|
code = 'f"""%s"""' % code
|
||||||
with pytest.raises(ParserSyntaxError):
|
with pytest.raises(ParserSyntaxError):
|
||||||
grammar.parse(code, error_recovery=False)
|
grammar.parse(code, error_recovery=False)
|
||||||
|
|
||||||
# It should work with error recovery.
|
# It should work with error recovery.
|
||||||
#grammar.parse(code, error_recovery=True)
|
grammar.parse(code, error_recovery=True)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
('code', 'start_pos', 'positions'), [
|
('code', 'positions'), [
|
||||||
# 2 times 2, 5 because python expr and endmarker.
|
# 2 times 2, 5 because python expr and endmarker.
|
||||||
('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
|
('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
|
||||||
(' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
|
('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
|
||||||
('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
|
(1, 10), (1, 11), (1, 12), (1, 13)]),
|
||||||
|
('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
|
||||||
|
(4, 2), (4, 5)]),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
def test_tokenize_start_pos(code, start_pos, positions):
|
def test_tokenize_start_pos(code, positions):
|
||||||
tokens = tokenize(code, start_pos)
|
tokens = list(tokenize(code, version_info=(3, 6)))
|
||||||
assert positions == [p.start_pos for p in tokens]
|
assert positions == [p.start_pos for p in tokens]
|
||||||
|
|
||||||
|
|
||||||
|
def test_roundtrip(grammar):
|
||||||
|
code = dedent("""\
|
||||||
|
f'''s{
|
||||||
|
str.uppe
|
||||||
|
'''
|
||||||
|
""")
|
||||||
|
tree = grammar.parse(code)
|
||||||
|
assert tree.get_code() == code
|
||||||
|
|||||||
@@ -106,14 +106,15 @@ def test_end_newlines():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(('code', 'types'), [
|
@pytest.mark.parametrize(('code', 'types'), [
|
||||||
('\r', ['error_leaf', 'endmarker']),
|
('\r', ['endmarker']),
|
||||||
('\n\r', ['error_leaf', 'endmarker'])
|
('\n\r', ['endmarker'])
|
||||||
])
|
])
|
||||||
def test_carriage_return_at_end(code, types):
|
def test_carriage_return_at_end(code, types):
|
||||||
"""
|
"""
|
||||||
By adding an artificial newline this creates weird side effects for
|
By adding an artificial newline this created weird side effects for
|
||||||
\r at the end of files that would normally be error leafs.
|
\r at the end of files.
|
||||||
"""
|
"""
|
||||||
tree = parse(code)
|
tree = parse(code)
|
||||||
assert tree.get_code() == code
|
assert tree.get_code() == code
|
||||||
assert [c.type for c in tree.children] == types
|
assert [c.type for c in tree.children] == types
|
||||||
|
assert tree.end_pos == (len(code) + 1, 0)
|
||||||
|
|||||||
@@ -32,3 +32,16 @@ def test_split_params_with_stars():
|
|||||||
assert_params(u'x, *args', x=None, args=None)
|
assert_params(u'x, *args', x=None, args=None)
|
||||||
assert_params(u'**kwargs', kwargs=None)
|
assert_params(u'**kwargs', kwargs=None)
|
||||||
assert_params(u'*args, **kwargs', args=None, kwargs=None)
|
assert_params(u'*args, **kwargs', args=None, kwargs=None)
|
||||||
|
|
||||||
|
|
||||||
|
def test_kw_only_no_kw(works_ge_py3):
|
||||||
|
"""
|
||||||
|
Parsing this should be working. In CPython the parser also parses this and
|
||||||
|
in a later step the AST complains.
|
||||||
|
"""
|
||||||
|
module = works_ge_py3.parse('def test(arg, *):\n pass')
|
||||||
|
if module is not None:
|
||||||
|
func = module.children[0]
|
||||||
|
open_, p1, asterisk, close = func._get_param_nodes()
|
||||||
|
assert p1.get_code('arg,')
|
||||||
|
assert asterisk.value == '*'
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import pytest
|
|||||||
|
|
||||||
from parso import load_grammar
|
from parso import load_grammar
|
||||||
from parso import ParserSyntaxError
|
from parso import ParserSyntaxError
|
||||||
|
from parso.pgen2 import generate_grammar
|
||||||
|
from parso.python import tokenize
|
||||||
|
|
||||||
|
|
||||||
def _parse(code, version=None):
|
def _parse(code, version=None):
|
||||||
@@ -270,3 +272,19 @@ def py_br(each_version):
|
|||||||
def test_py3_rb(works_ge_py3):
|
def test_py3_rb(works_ge_py3):
|
||||||
works_ge_py3.parse("rb'1'")
|
works_ge_py3.parse("rb'1'")
|
||||||
works_ge_py3.parse("RB'1'")
|
works_ge_py3.parse("RB'1'")
|
||||||
|
|
||||||
|
|
||||||
|
def test_left_recursion():
|
||||||
|
with pytest.raises(ValueError, match='left recursion'):
|
||||||
|
generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)
|
||||||
|
|
||||||
|
|
||||||
|
def test_ambiguities():
|
||||||
|
with pytest.raises(ValueError, match='ambiguous'):
|
||||||
|
generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match='ambiguous'):
|
||||||
|
generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match='ambiguous'):
|
||||||
|
generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes)
|
||||||
|
|||||||
@@ -114,6 +114,22 @@ def _get_actual_exception(code):
|
|||||||
# Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
|
# Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
|
||||||
# certain places. But in others this error makes sense.
|
# certain places. But in others this error makes sense.
|
||||||
return [wanted, "SyntaxError: can't use starred expression here"], line_nr
|
return [wanted, "SyntaxError: can't use starred expression here"], line_nr
|
||||||
|
elif wanted == 'SyntaxError: f-string: unterminated string':
|
||||||
|
wanted = 'SyntaxError: EOL while scanning string literal'
|
||||||
|
elif wanted == 'SyntaxError: f-string expression part cannot include a backslash':
|
||||||
|
return [
|
||||||
|
wanted,
|
||||||
|
"SyntaxError: EOL while scanning string literal",
|
||||||
|
"SyntaxError: unexpected character after line continuation character",
|
||||||
|
], line_nr
|
||||||
|
elif wanted == "SyntaxError: f-string: expecting '}'":
|
||||||
|
wanted = 'SyntaxError: EOL while scanning string literal'
|
||||||
|
elif wanted == 'SyntaxError: f-string: empty expression not allowed':
|
||||||
|
wanted = 'SyntaxError: invalid syntax'
|
||||||
|
elif wanted == "SyntaxError: f-string expression part cannot include '#'":
|
||||||
|
wanted = 'SyntaxError: invalid syntax'
|
||||||
|
elif wanted == "SyntaxError: f-string: single '}' is not allowed":
|
||||||
|
wanted = 'SyntaxError: invalid syntax'
|
||||||
return [wanted], line_nr
|
return [wanted], line_nr
|
||||||
|
|
||||||
|
|
||||||
@@ -242,6 +258,11 @@ def test_too_many_levels_of_indentation():
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
'code', [
|
'code', [
|
||||||
"f'{*args,}'",
|
"f'{*args,}'",
|
||||||
|
r'f"\""',
|
||||||
|
r'f"\\\""',
|
||||||
|
r'fr"\""',
|
||||||
|
r'fr"\\\""',
|
||||||
|
r"print(f'Some {x:.2f} and some {y}')",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
def test_valid_fstrings(code):
|
def test_valid_fstrings(code):
|
||||||
@@ -251,6 +272,8 @@ def test_valid_fstrings(code):
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
('code', 'message'), [
|
('code', 'message'), [
|
||||||
("f'{1+}'", ('invalid syntax')),
|
("f'{1+}'", ('invalid syntax')),
|
||||||
|
(r'f"\"', ('invalid syntax')),
|
||||||
|
(r'fr"\"', ('invalid syntax')),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
def test_invalid_fstrings(code, message):
|
def test_invalid_fstrings(code, message):
|
||||||
|
|||||||
@@ -6,16 +6,30 @@ import pytest
|
|||||||
|
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.utils import split_lines, parse_version_string
|
from parso.utils import split_lines, parse_version_string
|
||||||
from parso.python.token import (
|
from parso.python.token import PythonTokenTypes
|
||||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
|
|
||||||
from parso.python import tokenize
|
from parso.python import tokenize
|
||||||
from parso import parse
|
from parso import parse
|
||||||
from parso.python.tokenize import PythonToken
|
from parso.python.tokenize import PythonToken
|
||||||
|
|
||||||
|
|
||||||
def _get_token_list(string):
|
# To make it easier to access some of the token types, just put them here.
|
||||||
|
NAME = PythonTokenTypes.NAME
|
||||||
|
NEWLINE = PythonTokenTypes.NEWLINE
|
||||||
|
STRING = PythonTokenTypes.STRING
|
||||||
|
INDENT = PythonTokenTypes.INDENT
|
||||||
|
DEDENT = PythonTokenTypes.DEDENT
|
||||||
|
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
|
||||||
|
OP = PythonTokenTypes.OP
|
||||||
|
ENDMARKER = PythonTokenTypes.ENDMARKER
|
||||||
|
ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT
|
||||||
|
FSTRING_START = PythonTokenTypes.FSTRING_START
|
||||||
|
FSTRING_STRING = PythonTokenTypes.FSTRING_STRING
|
||||||
|
FSTRING_END = PythonTokenTypes.FSTRING_END
|
||||||
|
|
||||||
|
|
||||||
|
def _get_token_list(string, version=None):
|
||||||
# Load the current version.
|
# Load the current version.
|
||||||
version_info = parse_version_string()
|
version_info = parse_version_string(version)
|
||||||
return list(tokenize.tokenize(string, version_info))
|
return list(tokenize.tokenize(string, version_info))
|
||||||
|
|
||||||
|
|
||||||
@@ -126,7 +140,7 @@ def test_identifier_contains_unicode():
|
|||||||
else:
|
else:
|
||||||
# Unicode tokens in Python 2 seem to be identified as operators.
|
# Unicode tokens in Python 2 seem to be identified as operators.
|
||||||
# They will be ignored in the parser, that's ok.
|
# They will be ignored in the parser, that's ok.
|
||||||
assert unicode_token[0] == tokenize.ERRORTOKEN
|
assert unicode_token[0] == OP
|
||||||
|
|
||||||
|
|
||||||
def test_quoted_strings():
|
def test_quoted_strings():
|
||||||
@@ -162,6 +176,7 @@ def test_ur_literals():
|
|||||||
token_list = _get_token_list(literal)
|
token_list = _get_token_list(literal)
|
||||||
typ, result_literal, _, _ = token_list[0]
|
typ, result_literal, _, _ = token_list[0]
|
||||||
if is_literal:
|
if is_literal:
|
||||||
|
if typ != FSTRING_START:
|
||||||
assert typ == STRING
|
assert typ == STRING
|
||||||
assert result_literal == literal
|
assert result_literal == literal
|
||||||
else:
|
else:
|
||||||
@@ -175,6 +190,7 @@ def test_ur_literals():
|
|||||||
# Starting with Python 3.3 this ordering is also possible.
|
# Starting with Python 3.3 this ordering is also possible.
|
||||||
if py_version >= 33:
|
if py_version >= 33:
|
||||||
check('Rb""')
|
check('Rb""')
|
||||||
|
|
||||||
# Starting with Python 3.6 format strings where introduced.
|
# Starting with Python 3.6 format strings where introduced.
|
||||||
check('fr""', is_literal=py_version >= 36)
|
check('fr""', is_literal=py_version >= 36)
|
||||||
check('rF""', is_literal=py_version >= 36)
|
check('rF""', is_literal=py_version >= 36)
|
||||||
@@ -183,18 +199,18 @@ def test_ur_literals():
|
|||||||
|
|
||||||
|
|
||||||
def test_error_literal():
|
def test_error_literal():
|
||||||
error_token, endmarker = _get_token_list('"\n')
|
error_token, newline, endmarker = _get_token_list('"\n')
|
||||||
assert error_token.type == tokenize.ERRORTOKEN
|
assert error_token.type == ERRORTOKEN
|
||||||
assert endmarker.prefix == ''
|
assert error_token.string == '"'
|
||||||
assert error_token.string == '"\n'
|
assert newline.type == NEWLINE
|
||||||
assert endmarker.type == tokenize.ENDMARKER
|
assert endmarker.type == ENDMARKER
|
||||||
assert endmarker.prefix == ''
|
assert endmarker.prefix == ''
|
||||||
|
|
||||||
bracket, error_token, endmarker = _get_token_list('( """')
|
bracket, error_token, endmarker = _get_token_list('( """')
|
||||||
assert error_token.type == tokenize.ERRORTOKEN
|
assert error_token.type == ERRORTOKEN
|
||||||
assert error_token.prefix == ' '
|
assert error_token.prefix == ' '
|
||||||
assert error_token.string == '"""'
|
assert error_token.string == '"""'
|
||||||
assert endmarker.type == tokenize.ENDMARKER
|
assert endmarker.type == ENDMARKER
|
||||||
assert endmarker.prefix == ''
|
assert endmarker.prefix == ''
|
||||||
|
|
||||||
|
|
||||||
@@ -224,3 +240,105 @@ def test_endmarker_end_pos():
|
|||||||
def test_indentation(code, types):
|
def test_indentation(code, types):
|
||||||
actual_types = [t.type for t in _get_token_list(code)]
|
actual_types = [t.type for t in _get_token_list(code)]
|
||||||
assert actual_types == types + [ENDMARKER]
|
assert actual_types == types + [ENDMARKER]
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_string():
|
||||||
|
t1, newline, endmarker = _get_token_list(' "\n')
|
||||||
|
assert t1.type == ERRORTOKEN
|
||||||
|
assert t1.prefix == ' '
|
||||||
|
assert t1.string == '"'
|
||||||
|
assert newline.type == NEWLINE
|
||||||
|
assert endmarker.prefix == ''
|
||||||
|
assert endmarker.string == ''
|
||||||
|
|
||||||
|
|
||||||
|
def test_indent_error_recovery():
|
||||||
|
code = dedent("""\
|
||||||
|
str(
|
||||||
|
from x import a
|
||||||
|
def
|
||||||
|
""")
|
||||||
|
lst = _get_token_list(code)
|
||||||
|
expected = [
|
||||||
|
# `str(`
|
||||||
|
INDENT, NAME, OP,
|
||||||
|
# `from parso`
|
||||||
|
NAME, NAME,
|
||||||
|
# `import a` on same line as the previous from parso
|
||||||
|
NAME, NAME, NEWLINE,
|
||||||
|
# Dedent happens, because there's an import now and the import
|
||||||
|
# statement "breaks" out of the opening paren on the first line.
|
||||||
|
DEDENT,
|
||||||
|
# `b`
|
||||||
|
NAME, NEWLINE, ENDMARKER]
|
||||||
|
assert [t.type for t in lst] == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_token_after_dedent():
|
||||||
|
code = dedent("""\
|
||||||
|
class C:
|
||||||
|
pass
|
||||||
|
$foo
|
||||||
|
""")
|
||||||
|
lst = _get_token_list(code)
|
||||||
|
expected = [
|
||||||
|
NAME, NAME, OP, NEWLINE, INDENT, NAME, NEWLINE, DEDENT,
|
||||||
|
# $foo\n
|
||||||
|
ERRORTOKEN, NAME, NEWLINE, ENDMARKER
|
||||||
|
]
|
||||||
|
assert [t.type for t in lst] == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_brackets_no_indentation():
|
||||||
|
"""
|
||||||
|
There used to be an issue that the parentheses counting would go below
|
||||||
|
zero. This should not happen.
|
||||||
|
"""
|
||||||
|
code = dedent("""\
|
||||||
|
}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
lst = _get_token_list(code)
|
||||||
|
assert [t.type for t in lst] == [OP, NEWLINE, OP, OP, NEWLINE, ENDMARKER]
|
||||||
|
|
||||||
|
|
||||||
|
def test_form_feed():
|
||||||
|
error_token, endmarker = _get_token_list(dedent('''\
|
||||||
|
\f"""'''))
|
||||||
|
assert error_token.prefix == '\f'
|
||||||
|
assert error_token.string == '"""'
|
||||||
|
assert endmarker.prefix == ''
|
||||||
|
|
||||||
|
|
||||||
|
def test_carriage_return():
|
||||||
|
lst = _get_token_list(' =\\\rclass')
|
||||||
|
assert [t.type for t in lst] == [INDENT, OP, DEDENT, NAME, ENDMARKER]
|
||||||
|
|
||||||
|
|
||||||
|
def test_backslash():
|
||||||
|
code = '\\\n# 1 \n'
|
||||||
|
endmarker, = _get_token_list(code)
|
||||||
|
assert endmarker.prefix == code
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
('code', 'types'), [
|
||||||
|
('f"', [FSTRING_START]),
|
||||||
|
('f""', [FSTRING_START, FSTRING_END]),
|
||||||
|
('f" {}"', [FSTRING_START, FSTRING_STRING, OP, OP, FSTRING_END]),
|
||||||
|
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
|
||||||
|
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
|
||||||
|
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
|
||||||
|
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
|
||||||
|
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
|
||||||
|
(r'print(f"Some {x:.2f}a{y}")', [
|
||||||
|
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
|
||||||
|
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
|
||||||
|
]),
|
||||||
|
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_fstring(code, types, version_ge_py36):
|
||||||
|
actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
|
||||||
|
assert types + [ENDMARKER] == actual_types
|
||||||
|
|||||||
@@ -3,21 +3,42 @@ from codecs import BOM_UTF8
|
|||||||
from parso.utils import split_lines, python_bytes_to_unicode
|
from parso.utils import split_lines, python_bytes_to_unicode
|
||||||
import parso
|
import parso
|
||||||
|
|
||||||
|
import pytest
|
||||||
def test_split_lines_no_keepends():
|
|
||||||
assert split_lines('asd\r\n') == ['asd', '']
|
|
||||||
assert split_lines('asd\r\n\f') == ['asd', '\f']
|
|
||||||
assert split_lines('\fasd\r\n') == ['\fasd', '']
|
|
||||||
assert split_lines('') == ['']
|
|
||||||
assert split_lines('\n') == ['', '']
|
|
||||||
|
|
||||||
|
|
||||||
def test_split_lines_keepends():
|
@pytest.mark.parametrize(
|
||||||
assert split_lines('asd\r\n', keepends=True) == ['asd\r\n', '']
|
('string', 'expected_result', 'keepends'), [
|
||||||
assert split_lines('asd\r\n\f', keepends=True) == ['asd\r\n', '\f']
|
('asd\r\n', ['asd', ''], False),
|
||||||
assert split_lines('\fasd\r\n', keepends=True) == ['\fasd\r\n', '']
|
('asd\r\n', ['asd\r\n', ''], True),
|
||||||
assert split_lines('', keepends=True) == ['']
|
('asd\r', ['asd', ''], False),
|
||||||
assert split_lines('\n', keepends=True) == ['\n', '']
|
('asd\r', ['asd\r', ''], True),
|
||||||
|
('asd\n', ['asd', ''], False),
|
||||||
|
('asd\n', ['asd\n', ''], True),
|
||||||
|
|
||||||
|
('asd\r\n\f', ['asd', '\f'], False),
|
||||||
|
('asd\r\n\f', ['asd\r\n', '\f'], True),
|
||||||
|
|
||||||
|
('\fasd\r\n', ['\fasd', ''], False),
|
||||||
|
('\fasd\r\n', ['\fasd\r\n', ''], True),
|
||||||
|
|
||||||
|
('', [''], False),
|
||||||
|
('', [''], True),
|
||||||
|
|
||||||
|
('\n', ['', ''], False),
|
||||||
|
('\n', ['\n', ''], True),
|
||||||
|
|
||||||
|
('\r', ['', ''], False),
|
||||||
|
('\r', ['\r', ''], True),
|
||||||
|
|
||||||
|
# Invalid line breaks
|
||||||
|
('a\vb', ['a\vb'], False),
|
||||||
|
('a\vb', ['a\vb'], True),
|
||||||
|
('\x1C', ['\x1C'], False),
|
||||||
|
('\x1C', ['\x1C'], True),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_split_lines(string, expected_result, keepends):
|
||||||
|
assert split_lines(string, keepends=keepends) == expected_result
|
||||||
|
|
||||||
|
|
||||||
def test_python_bytes_to_unicode_unicode_text():
|
def test_python_bytes_to_unicode_unicode_text():
|
||||||
|
|||||||
9
tox.ini
9
tox.ini
@@ -1,10 +1,10 @@
|
|||||||
[tox]
|
[tox]
|
||||||
envlist = py26, py27, py33, py34, py35, py36
|
envlist = py27, py33, py34, py35, py36, py37, pypy
|
||||||
[testenv]
|
[testenv]
|
||||||
|
extras = testing
|
||||||
deps =
|
deps =
|
||||||
pytest>=3.0.7
|
py26,py33: pytest>=3.0.7,<3.3
|
||||||
# For --lf and --ff.
|
py26,py33: setuptools<37
|
||||||
pytest-cache
|
|
||||||
setenv =
|
setenv =
|
||||||
# https://github.com/tomchristie/django-rest-framework/issues/1957
|
# https://github.com/tomchristie/django-rest-framework/issues/1957
|
||||||
# tox corrupts __pycache__, solution from here:
|
# tox corrupts __pycache__, solution from here:
|
||||||
@@ -14,7 +14,6 @@ commands =
|
|||||||
[testenv:cov]
|
[testenv:cov]
|
||||||
deps =
|
deps =
|
||||||
coverage
|
coverage
|
||||||
{[testenv]deps}
|
|
||||||
commands =
|
commands =
|
||||||
coverage run --source parso -m pytest
|
coverage run --source parso -m pytest
|
||||||
coverage report
|
coverage report
|
||||||
|
|||||||
Reference in New Issue
Block a user