mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-08 21:54:54 +08:00
Compare commits
83 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
579146b501 | ||
|
|
deb4dbce1c | ||
|
|
8eda8decea | ||
|
|
f6935935c0 | ||
|
|
d3fa7e1cad | ||
|
|
83d9abd036 | ||
|
|
222e9117b4 | ||
|
|
eda2207e6c | ||
|
|
a91e5f2775 | ||
|
|
cba4f2ccc1 | ||
|
|
8f1a436ba1 | ||
|
|
9941348ec6 | ||
|
|
afb71dc762 | ||
|
|
0d96b12566 | ||
|
|
9d2ce4bcd4 | ||
|
|
a3e280c2b9 | ||
|
|
7c7f4f4e54 | ||
|
|
56b3e2cdc8 | ||
|
|
97f042c6ba | ||
|
|
b1aa7c6a79 | ||
|
|
235fda3fbb | ||
|
|
d8d2e596a5 | ||
|
|
e05ce5ae31 | ||
|
|
25e4ea9c24 | ||
|
|
9f88fe16a3 | ||
|
|
ba0e7a2e9d | ||
|
|
dc80152ff8 | ||
|
|
9e3154d167 | ||
|
|
065da34272 | ||
|
|
f89809de9a | ||
|
|
332c57ebcb | ||
|
|
acb173b703 | ||
|
|
47e78b37fe | ||
|
|
fc44af6165 | ||
|
|
73439d5863 | ||
|
|
085aad3038 | ||
|
|
7db500bfbc | ||
|
|
e689f3dce6 | ||
|
|
b076cdc12a | ||
|
|
0dea94c801 | ||
|
|
6cf487aee2 | ||
|
|
2ca629a2f6 | ||
|
|
5c1e953c17 | ||
|
|
a5f565ae10 | ||
|
|
a29ec25598 | ||
|
|
04360cdfe7 | ||
|
|
4824534f8a | ||
|
|
647073b1b9 | ||
|
|
50445f424e | ||
|
|
5b5d855fab | ||
|
|
04e18ebb01 | ||
|
|
d3cfcc24b8 | ||
|
|
89646e0970 | ||
|
|
bc8566e964 | ||
|
|
89932c368d | ||
|
|
dcae8cda92 | ||
|
|
1f6683b8ac | ||
|
|
0ec02e1d7f | ||
|
|
8db1498185 | ||
|
|
26e882d19c | ||
|
|
3a506b44ac | ||
|
|
bae36f8ab0 | ||
|
|
94268815e8 | ||
|
|
5a57e8df06 | ||
|
|
aa82a1d39a | ||
|
|
e0f74dd8ad | ||
|
|
7df254440e | ||
|
|
05a8236d3f | ||
|
|
2067845cef | ||
|
|
dcdd3bbc8e | ||
|
|
82868580a2 | ||
|
|
a18baf0d2c | ||
|
|
ed803f5749 | ||
|
|
032c7563c4 | ||
|
|
b83c641057 | ||
|
|
97d9aeafb7 | ||
|
|
7a277c7302 | ||
|
|
5993765e0a | ||
|
|
435d310c2b | ||
|
|
8aa280342a | ||
|
|
73c61bca4a | ||
|
|
60ed141d80 | ||
|
|
091e72562c |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,3 +9,4 @@
|
||||
/dist/
|
||||
parso.egg-info/
|
||||
/.cache/
|
||||
/.pytest_cache
|
||||
|
||||
@@ -7,6 +7,7 @@ python:
|
||||
- 3.4
|
||||
- 3.5
|
||||
- 3.6
|
||||
- 3.7-dev
|
||||
- pypy
|
||||
matrix:
|
||||
allow_failures:
|
||||
|
||||
@@ -5,6 +5,7 @@ David Halter (@davidhalter) <davidhalter88@gmail.com>
|
||||
|
||||
Code Contributors
|
||||
=================
|
||||
Alisdair Robertson (@robodair)
|
||||
|
||||
|
||||
Code Contributors (to Jedi and therefore possibly to this library)
|
||||
|
||||
@@ -3,8 +3,19 @@
|
||||
Changelog
|
||||
---------
|
||||
|
||||
0.2.0 (2018-04-15)
|
||||
+++++++++++++++++++
|
||||
|
||||
0.1.0 (2017-05-30)
|
||||
- f-strings are now parsed as a part of the normal Python grammar. This makes
|
||||
it way easier to deal with them.
|
||||
|
||||
0.1.1 (2017-11-05)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Fixed a few bugs in the caching layer
|
||||
- Added support for Python 3.7
|
||||
|
||||
0.1.0 (2017-09-04)
|
||||
+++++++++++++++++++
|
||||
|
||||
- Pulling the library out of Jedi. Some APIs will definitely change.
|
||||
|
||||
67
README.rst
67
README.rst
@@ -1,5 +1,5 @@
|
||||
###################################################################
|
||||
parso - A Python Parser Written in Python
|
||||
parso - A Python Parser
|
||||
###################################################################
|
||||
|
||||
.. image:: https://secure.travis-ci.org/davidhalter/parso.png?branch=master
|
||||
@@ -10,19 +10,52 @@ parso - A Python Parser Written in Python
|
||||
:target: https://coveralls.io/r/davidhalter/parso
|
||||
:alt: Coverage Status
|
||||
|
||||
.. image:: https://raw.githubusercontent.com/davidhalter/parso/master/docs/_static/logo_characters.png
|
||||
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing.
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing
|
||||
for different Python versions (in multiple Python versions). Parso is also able
|
||||
to list multiple syntax errors in your python file.
|
||||
|
||||
Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful
|
||||
for other projects as well.
|
||||
|
||||
Parso is very simplistic. It consists of a small API to parse Python and
|
||||
analyse the parsing tree.
|
||||
Parso consists of a small API to parse Python and analyse the syntax tree.
|
||||
|
||||
A simple example:
|
||||
|
||||
Ressources
|
||||
==========
|
||||
.. code-block:: python
|
||||
|
||||
>>> import parso
|
||||
>>> module = parso.parse('hello + 1', version="3.6")
|
||||
>>> expr = module.children[0]
|
||||
>>> expr
|
||||
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
|
||||
>>> print(expr.get_code())
|
||||
hello + 1
|
||||
>>> name = expr.children[0]
|
||||
>>> name
|
||||
<Name: hello@1,0>
|
||||
>>> name.end_pos
|
||||
(1, 5)
|
||||
>>> expr.end_pos
|
||||
(1, 9)
|
||||
|
||||
To list multiple issues:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> grammar = parso.load_grammar()
|
||||
>>> module = grammar.parse('foo +\nbar\ncontinue')
|
||||
>>> error1, error2 = grammar.iter_errors(module)
|
||||
>>> error1.message
|
||||
'SyntaxError: invalid syntax'
|
||||
>>> error2.message
|
||||
"SyntaxError: 'continue' not properly in loop"
|
||||
|
||||
Resources
|
||||
=========
|
||||
|
||||
- `Testing <http://parso.readthedocs.io/en/latest/docs/development.html#testing>`_
|
||||
- `PyPI <https://pypi.python.org/pypi/parso>`_
|
||||
- `Docs <https://parso.readthedocs.org/en/latest/>`_
|
||||
- Uses `semantic versioning <http://semver.org/>`_
|
||||
@@ -42,32 +75,16 @@ Known Issues
|
||||
============
|
||||
|
||||
- `async`/`await` are already used as keywords in Python3.6.
|
||||
- `from __future__ import print_function` is not supported,
|
||||
- `from __future__ import print_function` is not ignored.
|
||||
|
||||
Testing
|
||||
=======
|
||||
|
||||
The test suite depends on ``tox`` and ``pytest``::
|
||||
|
||||
pip install tox pytest
|
||||
|
||||
To run the tests for all supported Python versions::
|
||||
|
||||
tox
|
||||
|
||||
If you want to test only a specific Python version (e.g. Python 2.7), it's as
|
||||
easy as ::
|
||||
|
||||
tox -e py27
|
||||
|
||||
Tests are also run automatically on `Travis CI
|
||||
<https://travis-ci.org/davidhalter/parso/>`_.
|
||||
|
||||
Acknowledgements
|
||||
================
|
||||
|
||||
- Guido van Rossum (@gvanrossum) for creating the parser generator pgen2
|
||||
(originally used in lib2to3).
|
||||
- `Salome Schneider <https://www.crepes-schnaegg.ch/cr%C3%AApes-schn%C3%A4gg/kunst-f%C3%BCrs-cr%C3%AApes-mobil/>`_
|
||||
for the extremely awesome parso logo.
|
||||
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
|
||||
30
conftest.py
30
conftest.py
@@ -11,9 +11,12 @@ import parso
|
||||
from parso import cache
|
||||
from parso.utils import parse_version_string
|
||||
|
||||
|
||||
collect_ignore = ["setup.py"]
|
||||
|
||||
VERSIONS_2 = '2.6', '2.7'
|
||||
VERSIONS_3 = '3.3', '3.4', '3.5', '3.6', '3.7'
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def clean_parso_cache():
|
||||
"""
|
||||
@@ -49,20 +52,11 @@ def pytest_generate_tests(metafunc):
|
||||
ids=[c.name for c in cases]
|
||||
)
|
||||
elif 'each_version' in metafunc.fixturenames:
|
||||
metafunc.parametrize(
|
||||
'each_version',
|
||||
['2.6', '2.7', '3.3', '3.4', '3.5', '3.6'],
|
||||
)
|
||||
metafunc.parametrize('each_version', VERSIONS_2 + VERSIONS_3)
|
||||
elif 'each_py2_version' in metafunc.fixturenames:
|
||||
metafunc.parametrize(
|
||||
'each_py2_version',
|
||||
['2.6', '2.7'],
|
||||
)
|
||||
metafunc.parametrize('each_py2_version', VERSIONS_2)
|
||||
elif 'each_py3_version' in metafunc.fixturenames:
|
||||
metafunc.parametrize(
|
||||
'each_py3_version',
|
||||
['3.3', '3.4', '3.5', '3.6'],
|
||||
)
|
||||
metafunc.parametrize('each_py3_version', VERSIONS_3)
|
||||
|
||||
|
||||
class NormalizerIssueCase(object):
|
||||
@@ -97,16 +91,6 @@ def pytest_configure(config):
|
||||
root.addHandler(ch)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def each_py3_version():
|
||||
return '3.3', '3.4', '3.5', '3.6'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def each_py2_version():
|
||||
return '2.6', '2.7'
|
||||
|
||||
|
||||
class Checker():
|
||||
def __init__(self, version, is_passing):
|
||||
self.version = version
|
||||
|
||||
@@ -36,7 +36,7 @@ if [[ $tag_ref ]]; then
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
git tag $tag
|
||||
git tag -a $tag
|
||||
git push --tags
|
||||
fi
|
||||
|
||||
@@ -48,5 +48,5 @@ python setup.py sdist bdist_wheel
|
||||
twine upload dist/*
|
||||
|
||||
cd $BASE_DIR
|
||||
# Back in the development directory fetch tags.
|
||||
git fetch --tags
|
||||
# The tags have been pushed to this repo. Push the tags to github, now.
|
||||
git push --tags
|
||||
|
||||
BIN
docs/_static/logo.png
vendored
BIN
docs/_static/logo.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 200 KiB |
BIN
docs/_static/logo_characters.png
vendored
Normal file
BIN
docs/_static/logo_characters.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
5
docs/_themes/flask/layout.html
vendored
5
docs/_themes/flask/layout.html
vendored
@@ -6,8 +6,8 @@
|
||||
{% endif %}
|
||||
<link media="only screen and (max-device-width: 480px)" href="{{
|
||||
pathto('_static/small_flask.css', 1) }}" type= "text/css" rel="stylesheet" />
|
||||
<a href="https://github.com/davidhalter/jedi">
|
||||
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me on GitHub">
|
||||
<a href="https://github.com/davidhalter/parso">
|
||||
<img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png" alt="Fork me">
|
||||
</a>
|
||||
{% endblock %}
|
||||
{%- block relbar2 %}{% endblock %}
|
||||
@@ -19,7 +19,6 @@
|
||||
{% endblock %}
|
||||
{%- block footer %}
|
||||
<div class="footer">
|
||||
© Copyright {{ copyright }}.
|
||||
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
|
||||
</div>
|
||||
{% if pagename == 'index' %}
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
|
||||
import sys
|
||||
import os
|
||||
import datetime
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
@@ -45,7 +44,7 @@ master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'parso'
|
||||
copyright = u'2012 - {today.year}, parso contributors'.format(today=datetime.date.today())
|
||||
copyright = u'parso contributors'
|
||||
|
||||
import parso
|
||||
from parso.utils import version_info
|
||||
@@ -274,7 +273,7 @@ autodoc_default_flags = []
|
||||
# -- Options for intersphinx module --------------------------------------------
|
||||
|
||||
intersphinx_mapping = {
|
||||
'http://docs.python.org/': None,
|
||||
'http://docs.python.org/': ('https://docs.python.org/3.6', None),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@ Development
|
||||
===========
|
||||
|
||||
If you want to contribute anything to |parso|, just open an issue or pull
|
||||
request to discuss it. We welcome changes!
|
||||
request to discuss it. We welcome changes! Please check the ``CONTRIBUTING.md``
|
||||
file in the repository, first.
|
||||
|
||||
|
||||
Deprecations Process
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
Installation and Configuration
|
||||
==============================
|
||||
|
||||
The preferred way
|
||||
-----------------
|
||||
The preferred way (pip)
|
||||
-----------------------
|
||||
|
||||
On any system you can install |parso| directly from the Python package index
|
||||
using pip::
|
||||
|
||||
@@ -1,29 +1,42 @@
|
||||
.. include:: ../global.rst
|
||||
|
||||
.. _parser-tree:
|
||||
|
||||
Parser Tree
|
||||
===========
|
||||
|
||||
Usage
|
||||
-----
|
||||
The parser tree is returned by calling :py:meth:`parso.Grammar.parse`.
|
||||
|
||||
.. automodule:: parso.python
|
||||
.. note:: Note that parso positions are always 1 based for lines and zero
|
||||
based for columns. This means the first position in a file is (1, 0).
|
||||
|
||||
Parser Tree Base Classes
|
||||
------------------------
|
||||
|
||||
Generally there are two types of classes you will deal with:
|
||||
:py:class:`parso.tree.Leaf` and :py:class:`parso.tree.BaseNode`.
|
||||
|
||||
.. autoclass:: parso.tree.BaseNode
|
||||
:show-inheritance:
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
|
||||
Parser Tree Base Class
|
||||
----------------------
|
||||
.. autoclass:: parso.tree.Leaf
|
||||
:show-inheritance:
|
||||
:members:
|
||||
|
||||
All nodes and leaves have these methods/properties:
|
||||
|
||||
.. autoclass:: parso.tree.NodeOrLeaf
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Python Parser Tree
|
||||
------------------
|
||||
|
||||
.. currentmodule:: parso.python.tree
|
||||
|
||||
.. automodule:: parso.python.tree
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -4,12 +4,13 @@ Usage
|
||||
=====
|
||||
|
||||
|parso| works around grammars. You can simply create Python grammars by calling
|
||||
``load_grammar``. Grammars (with a custom tokenizer and custom parser trees)
|
||||
can also be created by directly instantiating ``Grammar``. More information
|
||||
:py:func:`parso.load_grammar`. Grammars (with a custom tokenizer and custom parser trees)
|
||||
can also be created by directly instantiating :py:func:`parso.Grammar`. More information
|
||||
about the resulting objects can be found in the :ref:`parser tree documentation
|
||||
<parser-tree>`.
|
||||
|
||||
The simplest way of using parso is without even loading a grammar:
|
||||
The simplest way of using parso is without even loading a grammar
|
||||
(:py:func:`parso.parse`):
|
||||
|
||||
.. sourcecode:: python
|
||||
|
||||
@@ -17,7 +18,31 @@ The simplest way of using parso is without even loading a grammar:
|
||||
>>> parso.parse('foo + bar')
|
||||
<Module: @1-1>
|
||||
|
||||
.. automodule:: parso.grammar
|
||||
Loading a Grammar
|
||||
-----------------
|
||||
|
||||
Typically if you want to work with one specific Python version, use:
|
||||
|
||||
.. autofunction:: parso.load_grammar
|
||||
|
||||
Grammar methods
|
||||
---------------
|
||||
|
||||
You will get back a grammar object that you can use to parse code and find
|
||||
issues in it:
|
||||
|
||||
.. autoclass:: parso.Grammar
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
|
||||
Error Retrieval
|
||||
---------------
|
||||
|
||||
|parso| is able to find multiple errors in your source code. Iterating through
|
||||
those errors yields the following instances:
|
||||
|
||||
.. autoclass:: parso.normalizer.Issue
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -25,17 +50,17 @@ The simplest way of using parso is without even loading a grammar:
|
||||
Utility
|
||||
-------
|
||||
|
||||
.. autofunction:: parso.parse
|
||||
|parso| also offers some utility functions that can be really useful:
|
||||
|
||||
.. automodule:: parso.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
.. autofunction:: parso.parse
|
||||
.. autofunction:: parso.split_lines
|
||||
.. autofunction:: parso.python_bytes_to_unicode
|
||||
|
||||
|
||||
Used By
|
||||
-------
|
||||
|
||||
- jedi_ (which is used by IPython and a lot of plugins).
|
||||
- jedi_ (which is used by IPython and a lot of editor plugins).
|
||||
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
.. include global.rst
|
||||
|
||||
parso - A Python Parser Written in Python
|
||||
=========================================
|
||||
parso - A Python Parser
|
||||
=======================
|
||||
|
||||
Release v\ |release|. (:doc:`Installation <docs/installation>`)
|
||||
|
||||
|
||||
@@ -1,10 +1,19 @@
|
||||
"""
|
||||
parso is a Python parser. It's really easy to use and supports multiple Python
|
||||
versions, file caching, round-trips and other stuff:
|
||||
r"""
|
||||
Parso is a Python parser that supports error recovery and round-trip parsing
|
||||
for different Python versions (in multiple Python versions). Parso is also able
|
||||
to list multiple syntax errors in your python file.
|
||||
|
||||
>>> from parso import load_grammar
|
||||
>>> grammar = load_grammar(version='2.7')
|
||||
>>> module = grammar.parse('hello + 1')
|
||||
Parso has been battle-tested by jedi_. It was pulled out of jedi to be useful
|
||||
for other projects as well.
|
||||
|
||||
Parso consists of a small API to parse Python and analyse the syntax tree.
|
||||
|
||||
.. _jedi: https://github.com/davidhalter/jedi
|
||||
|
||||
A simple example:
|
||||
|
||||
>>> import parso
|
||||
>>> module = parso.parse('hello + 1', version="3.6")
|
||||
>>> expr = module.children[0]
|
||||
>>> expr
|
||||
PythonNode(arith_expr, [<Name: hello@1,0>, <Operator: +>, <Number: 1>])
|
||||
@@ -17,19 +26,32 @@ hello + 1
|
||||
(1, 5)
|
||||
>>> expr.end_pos
|
||||
(1, 9)
|
||||
|
||||
To list multiple issues:
|
||||
|
||||
>>> grammar = parso.load_grammar()
|
||||
>>> module = grammar.parse('foo +\nbar\ncontinue')
|
||||
>>> error1, error2 = grammar.iter_errors(module)
|
||||
>>> error1.message
|
||||
'SyntaxError: invalid syntax'
|
||||
>>> error2.message
|
||||
"SyntaxError: 'continue' not properly in loop"
|
||||
"""
|
||||
|
||||
from parso.parser import ParserSyntaxError
|
||||
from parso.grammar import Grammar, load_grammar
|
||||
from parso.utils import split_lines, python_bytes_to_unicode
|
||||
|
||||
|
||||
__version__ = '0.0.4'
|
||||
__version__ = '0.2.0'
|
||||
|
||||
|
||||
def parse(code=None, **kwargs):
|
||||
"""
|
||||
A utility function to parse Python with the current Python version. Params
|
||||
are documented in ``Grammar.parse``.
|
||||
A utility function to avoid loading grammars.
|
||||
Params are documented in :py:meth:`parso.Grammar.parse`.
|
||||
|
||||
:param str version: The version used by :py:func:`parso.load_grammar`.
|
||||
"""
|
||||
version = kwargs.pop('version', None)
|
||||
grammar = load_grammar(version=version)
|
||||
|
||||
@@ -36,7 +36,7 @@ except AttributeError:
|
||||
def u(string):
|
||||
"""Cast to unicode DAMMIT!
|
||||
Written because Python2 repr always implicitly casts to a string, so we
|
||||
have to cast back to a unicode (and we now that we always deal with valid
|
||||
have to cast back to a unicode (and we know that we always deal with valid
|
||||
unicode, because we check that in the beginning).
|
||||
"""
|
||||
if py_version >= 30:
|
||||
|
||||
@@ -4,13 +4,19 @@ import sys
|
||||
import hashlib
|
||||
import gc
|
||||
import shutil
|
||||
import pickle
|
||||
import platform
|
||||
import errno
|
||||
import logging
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except:
|
||||
import pickle
|
||||
|
||||
from parso._compatibility import FileNotFoundError
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_PICKLE_VERSION = 30
|
||||
"""
|
||||
@@ -111,7 +117,7 @@ def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None):
|
||||
return None
|
||||
else:
|
||||
parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item
|
||||
logging.debug('pickle loaded: %s', path)
|
||||
LOG.debug('pickle loaded: %s', path)
|
||||
return module_cache_item.node
|
||||
|
||||
|
||||
@@ -125,7 +131,7 @@ def save_module(hashed_grammar, path, module, lines, pickling=True, cache_path=N
|
||||
item = _NodeCacheItem(module, lines, p_time)
|
||||
parser_cache.setdefault(hashed_grammar, {})[path] = item
|
||||
if pickling and path is not None:
|
||||
_save_to_file_system(hashed_grammar, path, item)
|
||||
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
|
||||
|
||||
|
||||
def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
|
||||
|
||||
@@ -12,17 +12,17 @@ from parso.parser import BaseParser
|
||||
from parso.python.parser import Parser as PythonParser
|
||||
from parso.python.errors import ErrorFinderConfig
|
||||
from parso.python import pep8
|
||||
from parso.python import fstring
|
||||
|
||||
_loaded_grammars = {}
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
"""
|
||||
Create custom grammars by calling this. It's not really supported, yet.
|
||||
:py:func:`parso.load_grammar` returns instances of this class.
|
||||
|
||||
:param text: A BNF representation of your grammar.
|
||||
Creating custom grammars by calling this is not supported, yet.
|
||||
"""
|
||||
#:param text: A BNF representation of your grammar.
|
||||
_error_normalizer_config = None
|
||||
_token_namespace = None
|
||||
_default_normalizer_config = pep8.PEP8NormalizerConfig()
|
||||
@@ -44,33 +44,38 @@ class Grammar(object):
|
||||
If you need finer grained control over the parsed instance, there will be
|
||||
other ways to access it.
|
||||
|
||||
:param code str: A unicode string that contains Python code.
|
||||
:param path str: The path to the file you want to open. Only needed for caching.
|
||||
:param error_recovery bool: If enabled, any code will be returned. If
|
||||
:param str code: A unicode or bytes string. When it's not possible to
|
||||
decode bytes to a string, returns a
|
||||
:py:class:`UnicodeDecodeError`.
|
||||
:param bool error_recovery: If enabled, any code will be returned. If
|
||||
it is invalid, it will be returned as an error node. If disabled,
|
||||
you will get a ParseError when encountering syntax errors in your
|
||||
code.
|
||||
:param start_symbol str: The grammar symbol that you want to parse. Only
|
||||
:param str start_symbol: The grammar symbol that you want to parse. Only
|
||||
allowed to be used when error_recovery is False.
|
||||
:param cache bool: Keeps a copy of the parser tree in RAM and on disk
|
||||
:param str path: The path to the file you want to open. Only needed for caching.
|
||||
:param bool cache: Keeps a copy of the parser tree in RAM and on disk
|
||||
if a path is given. Returns the cached trees if the corresponding
|
||||
files on disk have not changed.
|
||||
:param diff_cache bool: Diffs the cached python module against the new
|
||||
:param bool diff_cache: Diffs the cached python module against the new
|
||||
code and tries to parse only the parts that have changed. Returns
|
||||
the same (changed) module that is found in cache. Using this option
|
||||
requires you to not do anything anymore with the old cached module,
|
||||
because the contents of it might have changed.
|
||||
:param cache_path bool: If given saves the parso cache in this
|
||||
requires you to not do anything anymore with the cached modules
|
||||
under that path, because the contents of it might change. This
|
||||
option is still somewhat experimental. If you want stability,
|
||||
please don't use it.
|
||||
:param bool cache_path: If given saves the parso cache in this
|
||||
directory. If not given, defaults to the default cache places on
|
||||
each platform.
|
||||
|
||||
:return: A syntax tree node. Typically the module.
|
||||
:return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a
|
||||
:py:class:`parso.python.tree.Module`.
|
||||
"""
|
||||
if 'start_pos' in kwargs:
|
||||
raise TypeError("parse() got an unexpected keyworda argument.")
|
||||
raise TypeError("parse() got an unexpected keyword argument.")
|
||||
return self._parse(code=code, **kwargs)
|
||||
|
||||
def _parse(self, code=None, path=None, error_recovery=True,
|
||||
def _parse(self, code=None, error_recovery=True, path=None,
|
||||
start_symbol=None, cache=False, diff_cache=False,
|
||||
cache_path=None, start_pos=(1, 0)):
|
||||
"""
|
||||
@@ -88,10 +93,7 @@ class Grammar(object):
|
||||
if error_recovery and start_symbol != 'file_input':
|
||||
raise NotImplementedError("This is currently not implemented.")
|
||||
|
||||
if cache and code is None and path is not None:
|
||||
# With the current architecture we cannot load from cache if the
|
||||
# code is given, because we just load from cache if it's not older than
|
||||
# the latest change (file last modified).
|
||||
if cache and path is not None:
|
||||
module_node = load_module(self._hashed, path, cache_path=cache_path)
|
||||
if module_node is not None:
|
||||
return module_node
|
||||
@@ -152,6 +154,11 @@ class Grammar(object):
|
||||
return ns
|
||||
|
||||
def iter_errors(self, node):
|
||||
"""
|
||||
Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of
|
||||
:py:class:`parso.normalizer.Issue` objects. For Python this is
|
||||
a list of syntax/indentation errors.
|
||||
"""
|
||||
if self._error_normalizer_config is None:
|
||||
raise ValueError("No error normalizer specified for this grammar.")
|
||||
|
||||
@@ -178,7 +185,6 @@ class Grammar(object):
|
||||
normalizer.walk(node)
|
||||
return normalizer.issues
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
labels = self._pgen_grammar.number2symbol.values()
|
||||
txt = ' '.join(list(labels)[:3]) + ' ...'
|
||||
@@ -207,46 +213,21 @@ class PythonGrammar(Grammar):
|
||||
return tokenize(code, self.version_info)
|
||||
|
||||
|
||||
class PythonFStringGrammar(Grammar):
|
||||
_token_namespace = fstring.TokenNamespace
|
||||
_start_symbol = 'fstring'
|
||||
|
||||
def __init__(self):
|
||||
super(PythonFStringGrammar, self).__init__(
|
||||
text=fstring.GRAMMAR,
|
||||
tokenizer=fstring.tokenize,
|
||||
parser=fstring.Parser
|
||||
)
|
||||
|
||||
def parse(self, code, **kwargs):
|
||||
return self._parse(code, **kwargs)
|
||||
|
||||
def _parse(self, code, error_recovery=True, start_pos=(1, 0)):
|
||||
tokens = self._tokenizer(code, start_pos=start_pos)
|
||||
p = self._parser(
|
||||
self._pgen_grammar,
|
||||
error_recovery=error_recovery,
|
||||
start_symbol=self._start_symbol,
|
||||
)
|
||||
return p.parse(tokens=tokens)
|
||||
|
||||
def parse_leaf(self, leaf, error_recovery=True):
|
||||
code = leaf._get_payload()
|
||||
return self.parse(code, error_recovery=True, start_pos=leaf.start_pos)
|
||||
|
||||
|
||||
def load_grammar(**kwargs):
|
||||
"""
|
||||
Loads a Python grammar. The default version is the current Python version.
|
||||
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
||||
version.
|
||||
|
||||
If you need support for a specific version, please use e.g.
|
||||
`version='3.3'`.
|
||||
:param str version: A python version string, e.g. ``version='3.3'``.
|
||||
"""
|
||||
def load_grammar(language='python', version=None):
|
||||
if language == 'python':
|
||||
version_info = parse_version_string(version)
|
||||
|
||||
file = 'python/grammar%s%s.txt' % (version_info.major, version_info.minor)
|
||||
file = os.path.join(
|
||||
'python',
|
||||
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
||||
)
|
||||
|
||||
global _loaded_grammars
|
||||
path = os.path.join(os.path.dirname(__file__), file)
|
||||
@@ -262,10 +243,6 @@ def load_grammar(**kwargs):
|
||||
except FileNotFoundError:
|
||||
message = "Python version %s is currently not supported." % version
|
||||
raise NotImplementedError(message)
|
||||
elif language == 'python-f-string':
|
||||
if version is not None:
|
||||
raise NotImplementedError("Currently different versions are not supported.")
|
||||
return PythonFStringGrammar()
|
||||
else:
|
||||
raise NotImplementedError("No support for language %s." % language)
|
||||
|
||||
|
||||
@@ -121,8 +121,18 @@ class Issue(object):
|
||||
def __init__(self, node, code, message):
|
||||
self._node = node
|
||||
self.code = code
|
||||
"""
|
||||
An integer code that stands for the type of error.
|
||||
"""
|
||||
self.message = message
|
||||
"""
|
||||
A message (string) for the issue.
|
||||
"""
|
||||
self.start_pos = node.start_pos
|
||||
"""
|
||||
The start position position of the error as a tuple (line, column). As
|
||||
always in |parso| the first line is 1 and the first column 0.
|
||||
"""
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.start_pos == other.start_pos and self.code == other.code
|
||||
|
||||
@@ -16,7 +16,10 @@ fallback token code OP, but the parser needs the actual token code.
|
||||
|
||||
"""
|
||||
|
||||
import pickle
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except:
|
||||
import pickle
|
||||
|
||||
|
||||
class Grammar(object):
|
||||
|
||||
@@ -28,6 +28,7 @@ class ParserGenerator(object):
|
||||
c = grammar.Grammar(self._bnf_text)
|
||||
names = list(self.dfas.keys())
|
||||
names.sort()
|
||||
# TODO do we still need this?
|
||||
names.remove(self.startsymbol)
|
||||
names.insert(0, self.startsymbol)
|
||||
for name in names:
|
||||
@@ -316,8 +317,8 @@ class ParserGenerator(object):
|
||||
|
||||
def _expect(self, type):
|
||||
if self.type != type:
|
||||
self._raise_error("expected %s, got %s(%s)",
|
||||
type, self.type, self.value)
|
||||
self._raise_error("expected %s(%s), got %s(%s)",
|
||||
type, token.tok_name[type], self.type, self.value)
|
||||
value = self.value
|
||||
self._gettoken()
|
||||
return value
|
||||
|
||||
@@ -16,6 +16,8 @@ from parso.python.tree import EndMarker
|
||||
from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT,
|
||||
ENDMARKER, INDENT, DEDENT)
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_last_line(node_or_leaf):
|
||||
last_leaf = node_or_leaf.get_last_leaf()
|
||||
@@ -116,7 +118,7 @@ class DiffParser(object):
|
||||
|
||||
Returns the new module node.
|
||||
'''
|
||||
logging.debug('diff parser start')
|
||||
LOG.debug('diff parser start')
|
||||
# Reset the used names cache so they get regenerated.
|
||||
self._module._used_names = None
|
||||
|
||||
@@ -127,11 +129,11 @@ class DiffParser(object):
|
||||
line_length = len(new_lines)
|
||||
sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
|
||||
opcodes = sm.get_opcodes()
|
||||
logging.debug('diff parser calculated')
|
||||
logging.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
|
||||
LOG.debug('diff parser calculated')
|
||||
LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))
|
||||
|
||||
for operation, i1, i2, j1, j2 in opcodes:
|
||||
logging.debug('diff %s old[%s:%s] new[%s:%s]',
|
||||
LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]',
|
||||
operation, i1 + 1, i2, j1 + 1, j2)
|
||||
|
||||
if j2 == line_length and new_lines[-1] == '':
|
||||
@@ -161,12 +163,12 @@ class DiffParser(object):
|
||||
% (last_pos, line_length, ''.join(diff))
|
||||
)
|
||||
|
||||
logging.debug('diff parser end')
|
||||
LOG.debug('diff parser end')
|
||||
return self._module
|
||||
|
||||
def _enabled_debugging(self, old_lines, lines_new):
|
||||
if self._module.get_code() != ''.join(lines_new):
|
||||
logging.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
||||
LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines),
|
||||
''.join(lines_new))
|
||||
|
||||
def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
|
||||
@@ -203,7 +205,7 @@ class DiffParser(object):
|
||||
from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
|
||||
to = self._nodes_stack.parsed_until_line
|
||||
|
||||
logging.debug('diff actually copy %s to %s', from_, to)
|
||||
LOG.debug('diff actually copy %s to %s', from_, to)
|
||||
# Since there are potential bugs that might loop here endlessly, we
|
||||
# just stop here.
|
||||
assert last_until_line != self._nodes_stack.parsed_until_line \
|
||||
@@ -248,7 +250,7 @@ class DiffParser(object):
|
||||
nodes = node.children
|
||||
|
||||
self._nodes_stack.add_parsed_nodes(nodes)
|
||||
logging.debug(
|
||||
LOG.debug(
|
||||
'parse_part from %s to %s (to %s in part parser)',
|
||||
nodes[0].get_start_pos_of_prefix()[0],
|
||||
self._nodes_stack.parsed_until_line,
|
||||
@@ -452,7 +454,7 @@ class _NodesStack(object):
|
||||
self._last_prefix = ''
|
||||
if is_endmarker:
|
||||
try:
|
||||
separation = last_leaf.prefix.rindex('\n')
|
||||
separation = last_leaf.prefix.rindex('\n') + 1
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
@@ -460,7 +462,7 @@ class _NodesStack(object):
|
||||
# That is not relevant if parentheses were opened. Always parse
|
||||
# until the end of a line.
|
||||
last_leaf.prefix, self._last_prefix = \
|
||||
last_leaf.prefix[:separation + 1], last_leaf.prefix[separation + 1:]
|
||||
last_leaf.prefix[:separation], last_leaf.prefix[separation:]
|
||||
|
||||
first_leaf = tree_nodes[0].get_first_leaf()
|
||||
first_leaf.prefix = self.prefix + first_leaf.prefix
|
||||
@@ -470,7 +472,6 @@ class _NodesStack(object):
|
||||
self.prefix = last_leaf.prefix
|
||||
|
||||
tree_nodes = tree_nodes[:-1]
|
||||
|
||||
return tree_nodes
|
||||
|
||||
def copy_nodes(self, tree_nodes, until_line, line_offset):
|
||||
@@ -490,6 +491,13 @@ class _NodesStack(object):
|
||||
new_tos = tos
|
||||
for node in nodes:
|
||||
if node.type == 'endmarker':
|
||||
# We basically removed the endmarker, but we are not allowed to
|
||||
# remove the newline at the end of the line, otherwise it's
|
||||
# going to be missing.
|
||||
try:
|
||||
self.prefix = node.prefix[:node.prefix.rindex('\n') + 1]
|
||||
except ValueError:
|
||||
pass
|
||||
# Endmarkers just distort all the checks below. Remove them.
|
||||
break
|
||||
|
||||
|
||||
@@ -563,7 +563,8 @@ class _ReturnAndYieldChecks(SyntaxRule):
|
||||
and self._normalizer.version == (3, 5):
|
||||
self.add_issue(self.get_node(leaf), message=self.message_async_yield)
|
||||
|
||||
@ErrorFinder.register_rule(type='atom')
|
||||
|
||||
@ErrorFinder.register_rule(type='strings')
|
||||
class _BytesAndStringMix(SyntaxRule):
|
||||
# e.g. 's' b''
|
||||
message = "cannot mix bytes and nonbytes literals"
|
||||
@@ -744,7 +745,12 @@ class _NonlocalModuleLevelRule(SyntaxRule):
|
||||
|
||||
@ErrorFinder.register_rule(type='arglist')
|
||||
class _ArglistRule(SyntaxRule):
|
||||
message = "Generator expression must be parenthesized if not sole argument"
|
||||
@property
|
||||
def message(self):
|
||||
if self._normalizer.version < (3, 7):
|
||||
return "Generator expression must be parenthesized if not sole argument"
|
||||
else:
|
||||
return "Generator expression must be parenthesized"
|
||||
|
||||
def is_issue(self, node):
|
||||
first_arg = node.children[0]
|
||||
@@ -837,101 +843,36 @@ class _TryStmtRule(SyntaxRule):
|
||||
self.add_issue(default_except, message=self.message)
|
||||
|
||||
|
||||
@ErrorFinder.register_rule(type='string')
|
||||
@ErrorFinder.register_rule(type='fstring')
|
||||
class _FStringRule(SyntaxRule):
|
||||
_fstring_grammar = None
|
||||
message_empty = "f-string: empty expression not allowed" # f'{}'
|
||||
message_single_closing = "f-string: single '}' is not allowed" # f'}'
|
||||
message_nested = "f-string: expressions nested too deeply"
|
||||
message_backslash = "f-string expression part cannot include a backslash" # f'{"\"}' or f'{"\\"}'
|
||||
message_comment = "f-string expression part cannot include '#'" # f'{#}'
|
||||
message_unterminated_string = "f-string: unterminated string" # f'{"}'
|
||||
message_conversion = "f-string: invalid conversion character: expected 's', 'r', or 'a'"
|
||||
message_incomplete = "f-string: expecting '}'" # f'{'
|
||||
message_syntax = "invalid syntax"
|
||||
|
||||
@classmethod
|
||||
def _load_grammar(cls):
|
||||
import parso
|
||||
def _check_format_spec(self, format_spec, depth):
|
||||
self._check_fstring_contents(format_spec.children[1:], depth)
|
||||
|
||||
if cls._fstring_grammar is None:
|
||||
cls._fstring_grammar = parso.load_grammar(language='python-f-string')
|
||||
return cls._fstring_grammar
|
||||
def _check_fstring_expr(self, fstring_expr, depth):
|
||||
if depth >= 2:
|
||||
self.add_issue(fstring_expr, message=self.message_nested)
|
||||
|
||||
conversion = fstring_expr.children[2]
|
||||
if conversion.type == 'fstring_conversion':
|
||||
name = conversion.children[1]
|
||||
if name.value not in ('s', 'r', 'a'):
|
||||
self.add_issue(name, message=self.message_conversion)
|
||||
|
||||
format_spec = fstring_expr.children[-2]
|
||||
if format_spec.type == 'fstring_format_spec':
|
||||
self._check_format_spec(format_spec, depth + 1)
|
||||
|
||||
def is_issue(self, fstring):
|
||||
if 'f' not in fstring.string_prefix.lower():
|
||||
return
|
||||
self._check_fstring_contents(fstring.children[1:-1])
|
||||
|
||||
parsed = self._load_grammar().parse_leaf(fstring)
|
||||
for child in parsed.children:
|
||||
if child.type == 'expression':
|
||||
self._check_expression(child)
|
||||
elif child.type == 'error_node':
|
||||
next_ = child.get_next_leaf()
|
||||
if next_.type == 'error_leaf' and next_.original_type == 'unterminated_string':
|
||||
self.add_issue(next_, message=self.message_unterminated_string)
|
||||
# At this point nothing more is comming except the error
|
||||
# leaf that we've already checked here.
|
||||
break
|
||||
self.add_issue(child, message=self.message_incomplete)
|
||||
elif child.type == 'error_leaf':
|
||||
self.add_issue(child, message=self.message_single_closing)
|
||||
|
||||
def _check_python_expr(self, python_expr):
|
||||
value = python_expr.value
|
||||
if '\\' in value:
|
||||
self.add_issue(python_expr, message=self.message_backslash)
|
||||
return
|
||||
if '#' in value:
|
||||
self.add_issue(python_expr, message=self.message_comment)
|
||||
return
|
||||
if re.match('\s*$', value) is not None:
|
||||
self.add_issue(python_expr, message=self.message_empty)
|
||||
return
|
||||
|
||||
# This is now nested parsing. We parsed the fstring and now
|
||||
# we're parsing Python again.
|
||||
try:
|
||||
# CPython has a bit of a special ways to parse Python code within
|
||||
# f-strings. It wraps the code in brackets to make sure that
|
||||
# whitespace doesn't make problems (indentation/newlines).
|
||||
# Just use that algorithm as well here and adapt start positions.
|
||||
start_pos = python_expr.start_pos
|
||||
start_pos = start_pos[0], start_pos[1] - 1
|
||||
eval_input = self._normalizer.grammar._parse(
|
||||
'(%s)' % value,
|
||||
start_symbol='eval_input',
|
||||
start_pos=start_pos,
|
||||
error_recovery=False
|
||||
)
|
||||
except ParserSyntaxError as e:
|
||||
self.add_issue(e.error_leaf, message=self.message_syntax)
|
||||
return
|
||||
|
||||
issues = self._normalizer.grammar.iter_errors(eval_input)
|
||||
self._normalizer.issues += issues
|
||||
|
||||
def _check_format_spec(self, format_spec):
|
||||
for expression in format_spec.children[1:]:
|
||||
nested_format_spec = expression.children[-2]
|
||||
if nested_format_spec.type == 'format_spec':
|
||||
if len(nested_format_spec.children) > 1:
|
||||
self.add_issue(
|
||||
nested_format_spec.children[1],
|
||||
message=self.message_nested
|
||||
)
|
||||
|
||||
self._check_expression(expression)
|
||||
|
||||
def _check_expression(self, expression):
|
||||
for c in expression.children:
|
||||
if c.type == 'python_expr':
|
||||
self._check_python_expr(c)
|
||||
elif c.type == 'conversion':
|
||||
if c.value not in ('s', 'r', 'a'):
|
||||
self.add_issue(c, message=self.message_conversion)
|
||||
elif c.type == 'format_spec':
|
||||
self._check_format_spec(c)
|
||||
def _check_fstring_contents(self, children, depth=0):
|
||||
for fstring_content in children:
|
||||
if fstring_content.type == 'fstring_expr':
|
||||
self._check_fstring_expr(fstring_content, depth)
|
||||
|
||||
|
||||
class _CheckAssignmentRule(SyntaxRule):
|
||||
@@ -944,7 +885,7 @@ class _CheckAssignmentRule(SyntaxRule):
|
||||
first, second = node.children[:2]
|
||||
error = _get_comprehension_type(node)
|
||||
if error is None:
|
||||
if second.type in ('dictorsetmaker', 'string'):
|
||||
if second.type == 'dictorsetmaker':
|
||||
error = 'literal'
|
||||
elif first in ('(', '['):
|
||||
if second.type == 'yield_expr':
|
||||
@@ -963,7 +904,7 @@ class _CheckAssignmentRule(SyntaxRule):
|
||||
error = 'Ellipsis'
|
||||
elif type_ == 'comparison':
|
||||
error = 'comparison'
|
||||
elif type_ in ('string', 'number'):
|
||||
elif type_ in ('string', 'number', 'strings'):
|
||||
error = 'literal'
|
||||
elif type_ == 'yield_expr':
|
||||
# This one seems to be a slightly different warning in Python.
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
import re
|
||||
|
||||
from itertools import count
|
||||
from parso.utils import PythonVersionInfo
|
||||
from parso.utils import split_lines
|
||||
from parso.python.tokenize import Token
|
||||
from parso import parser
|
||||
from parso.tree import TypedLeaf, ErrorNode, ErrorLeaf
|
||||
|
||||
version36 = PythonVersionInfo(3, 6)
|
||||
|
||||
|
||||
class TokenNamespace:
|
||||
_c = count()
|
||||
LBRACE = next(_c)
|
||||
RBRACE = next(_c)
|
||||
ENDMARKER = next(_c)
|
||||
COLON = next(_c)
|
||||
CONVERSION = next(_c)
|
||||
PYTHON_EXPR = next(_c)
|
||||
EXCLAMATION_MARK = next(_c)
|
||||
UNTERMINATED_STRING = next(_c)
|
||||
|
||||
token_map = dict((v, k) for k, v in locals().items() if not k.startswith('_'))
|
||||
|
||||
@classmethod
|
||||
def generate_token_id(cls, string):
|
||||
if string == '{':
|
||||
return cls.LBRACE
|
||||
elif string == '}':
|
||||
return cls.RBRACE
|
||||
elif string == '!':
|
||||
return cls.EXCLAMATION_MARK
|
||||
elif string == ':':
|
||||
return cls.COLON
|
||||
return getattr(cls, string)
|
||||
|
||||
|
||||
GRAMMAR = """
|
||||
fstring: expression* ENDMARKER
|
||||
format_spec: ':' expression*
|
||||
expression: '{' PYTHON_EXPR [ '!' CONVERSION ] [ format_spec ] '}'
|
||||
"""
|
||||
|
||||
_prefix = r'((?:[^{}]+)*)'
|
||||
_expr = _prefix + r'(\{|\}|$)'
|
||||
_in_expr = r'([^{}\[\]:"\'!]*)(.?)'
|
||||
# There's only one conversion character allowed. But the rules have to be
|
||||
# checked later anyway, so allow more here. This makes error recovery nicer.
|
||||
_conversion = r'([^={}:]*)(.?)'
|
||||
|
||||
_compiled_expr = re.compile(_expr)
|
||||
_compiled_in_expr = re.compile(_in_expr)
|
||||
_compiled_conversion = re.compile(_conversion)
|
||||
|
||||
|
||||
def tokenize(code, start_pos=(1, 0)):
|
||||
def add_to_pos(string):
|
||||
lines = split_lines(string)
|
||||
l = len(lines[-1])
|
||||
if len(lines) > 1:
|
||||
start_pos[0] += len(lines) - 1
|
||||
start_pos[1] = l
|
||||
else:
|
||||
start_pos[1] += l
|
||||
|
||||
def tok(value, type=None, prefix=''):
|
||||
if type is None:
|
||||
type = TokenNamespace.generate_token_id(value)
|
||||
|
||||
add_to_pos(prefix)
|
||||
token = Token(type, value, tuple(start_pos), prefix)
|
||||
add_to_pos(value)
|
||||
return token
|
||||
|
||||
start = 0
|
||||
recursion_level = 0
|
||||
added_prefix = ''
|
||||
start_pos = list(start_pos)
|
||||
while True:
|
||||
match = _compiled_expr.match(code, start)
|
||||
prefix = added_prefix + match.group(1)
|
||||
found = match.group(2)
|
||||
start = match.end()
|
||||
if not found:
|
||||
# We're at the end.
|
||||
break
|
||||
|
||||
if found == '}':
|
||||
if recursion_level == 0 and len(code) > start and code[start] == '}':
|
||||
# This is a }} escape.
|
||||
added_prefix = prefix + '}}'
|
||||
start += 1
|
||||
continue
|
||||
|
||||
recursion_level = max(0, recursion_level - 1)
|
||||
yield tok(found, prefix=prefix)
|
||||
added_prefix = ''
|
||||
else:
|
||||
assert found == '{'
|
||||
if recursion_level == 0 and len(code) > start and code[start] == '{':
|
||||
# This is a {{ escape.
|
||||
added_prefix = prefix + '{{'
|
||||
start += 1
|
||||
continue
|
||||
|
||||
recursion_level += 1
|
||||
yield tok(found, prefix=prefix)
|
||||
added_prefix = ''
|
||||
|
||||
expression = ''
|
||||
squared_count = 0
|
||||
curly_count = 0
|
||||
while True:
|
||||
expr_match = _compiled_in_expr.match(code, start)
|
||||
expression += expr_match.group(1)
|
||||
found = expr_match.group(2)
|
||||
start = expr_match.end()
|
||||
|
||||
if found == '{':
|
||||
curly_count += 1
|
||||
expression += found
|
||||
elif found == '}' and curly_count > 0:
|
||||
curly_count -= 1
|
||||
expression += found
|
||||
elif found == '[':
|
||||
squared_count += 1
|
||||
expression += found
|
||||
elif found == ']':
|
||||
# Use a max function here, because the Python code might
|
||||
# just have syntax errors.
|
||||
squared_count = max(0, squared_count - 1)
|
||||
expression += found
|
||||
elif found == ':' and (squared_count or curly_count):
|
||||
expression += found
|
||||
elif found in ('"', "'"):
|
||||
search = found
|
||||
if len(code) > start + 1 and \
|
||||
code[start] == found == code[start+1]:
|
||||
search *= 3
|
||||
start += 2
|
||||
|
||||
index = code.find(search, start)
|
||||
if index == -1:
|
||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
||||
yield tok(
|
||||
found + code[start:],
|
||||
type=TokenNamespace.UNTERMINATED_STRING,
|
||||
)
|
||||
start = len(code)
|
||||
break
|
||||
expression += found + code[start:index+1]
|
||||
start = index + 1
|
||||
elif found == '!' and len(code) > start and code[start] == '=':
|
||||
# This is a python `!=` and not a conversion.
|
||||
expression += found
|
||||
else:
|
||||
yield tok(expression, type=TokenNamespace.PYTHON_EXPR)
|
||||
if found:
|
||||
yield tok(found)
|
||||
break
|
||||
|
||||
if found == '!':
|
||||
conversion_match = _compiled_conversion.match(code, start)
|
||||
found = conversion_match.group(2)
|
||||
start = conversion_match.end()
|
||||
yield tok(conversion_match.group(1), type=TokenNamespace.CONVERSION)
|
||||
if found:
|
||||
yield tok(found)
|
||||
if found == '}':
|
||||
recursion_level -= 1
|
||||
|
||||
# We don't need to handle everything after ':', because that is
|
||||
# basically new tokens.
|
||||
|
||||
yield tok('', type=TokenNamespace.ENDMARKER, prefix=prefix)
|
||||
|
||||
|
||||
class Parser(parser.BaseParser):
|
||||
def parse(self, tokens):
|
||||
node = super(Parser, self).parse(tokens)
|
||||
if isinstance(node, self.default_leaf): # Is an endmarker.
|
||||
# If there's no curly braces we get back a non-module. We always
|
||||
# want an fstring.
|
||||
node = self.default_node('fstring', [node])
|
||||
|
||||
return node
|
||||
|
||||
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||
# TODO this is so ugly.
|
||||
leaf_type = TokenNamespace.token_map[type].lower()
|
||||
return TypedLeaf(leaf_type, value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback):
|
||||
if not self._error_recovery:
|
||||
return super(Parser, self).error_recovery(
|
||||
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback
|
||||
)
|
||||
|
||||
token_type = TokenNamespace.token_map[typ].lower()
|
||||
if len(stack) == 1:
|
||||
error_leaf = ErrorLeaf(token_type, value, start_pos, prefix)
|
||||
stack[0][2][1].append(error_leaf)
|
||||
else:
|
||||
dfa, state, (type_, nodes) = stack[1]
|
||||
stack[0][2][1].append(ErrorNode(nodes))
|
||||
stack[1:] = []
|
||||
|
||||
add_token_callback(typ, value, start_pos, prefix)
|
||||
@@ -119,7 +119,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | STRING+)
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
# Dave: Renamed testlist_gexpr to testlist_comp, because in 2.7+ this is the
|
||||
# default. It's more consistent like this.
|
||||
|
||||
@@ -104,7 +104,8 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [listmaker] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
'`' testlist1 '`' |
|
||||
NAME | NUMBER | STRING+)
|
||||
NAME | NUMBER | strings)
|
||||
strings: STRING+
|
||||
listmaker: test ( list_for | (',' test)* [','] )
|
||||
testlist_comp: test ( comp_for | (',' test)* [','] )
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
|
||||
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
|
||||
@@ -103,7 +103,8 @@ power: atom trailer* ['**' factor]
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
|
||||
@@ -110,7 +110,8 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
strings: STRING+
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
|
||||
@@ -108,7 +108,7 @@ atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
@@ -148,3 +148,10 @@ encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist_comp [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
|
||||
157
parso/python/grammar37.txt
Normal file
157
parso/python/grammar37.txt
Normal file
@@ -0,0 +1,157 @@
|
||||
# Grammar for Python
|
||||
|
||||
# NOTE WELL: You should also follow all the steps listed at
|
||||
# https://docs.python.org/devguide/grammar.html
|
||||
|
||||
# Start symbols for the grammar:
|
||||
# single_input is a single interactive statement;
|
||||
# file_input is a module or sequence of commands read from an input file;
|
||||
# eval_input is the input for the eval() functions.
|
||||
# NB: compound_stmt in single_input is followed by extra NEWLINE!
|
||||
single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
|
||||
file_input: (NEWLINE | stmt)* ENDMARKER
|
||||
eval_input: testlist NEWLINE* ENDMARKER
|
||||
decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
|
||||
decorators: decorator+
|
||||
decorated: decorators (classdef | funcdef | async_funcdef)
|
||||
|
||||
# NOTE: Francisco Souza/Reinoud Elhorst, using ASYNC/'await' keywords instead of
|
||||
# skipping python3.5+ compatibility, in favour of 3.7 solution
|
||||
async_funcdef: 'async' funcdef
|
||||
funcdef: 'def' NAME parameters ['->' test] ':' suite
|
||||
|
||||
parameters: '(' [typedargslist] ')'
|
||||
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
|
||||
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [',']]]
|
||||
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
|
||||
| '**' tfpdef [','])
|
||||
tfpdef: NAME [':' test]
|
||||
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
|
||||
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']]]
|
||||
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
|
||||
| '**' vfpdef [',']
|
||||
)
|
||||
vfpdef: NAME
|
||||
|
||||
stmt: simple_stmt | compound_stmt
|
||||
simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
|
||||
small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
|
||||
import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
|
||||
expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
|
||||
('=' (yield_expr|testlist_star_expr))*)
|
||||
annassign: ':' test ['=' test]
|
||||
testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
|
||||
augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
|
||||
'<<=' | '>>=' | '**=' | '//=')
|
||||
# For normal and annotated assignments, additional restrictions enforced by the interpreter
|
||||
del_stmt: 'del' exprlist
|
||||
pass_stmt: 'pass'
|
||||
flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
|
||||
break_stmt: 'break'
|
||||
continue_stmt: 'continue'
|
||||
return_stmt: 'return' [testlist]
|
||||
yield_stmt: yield_expr
|
||||
raise_stmt: 'raise' [test ['from' test]]
|
||||
import_stmt: import_name | import_from
|
||||
import_name: 'import' dotted_as_names
|
||||
# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
|
||||
import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
|
||||
'import' ('*' | '(' import_as_names ')' | import_as_names))
|
||||
import_as_name: NAME ['as' NAME]
|
||||
dotted_as_name: dotted_name ['as' NAME]
|
||||
import_as_names: import_as_name (',' import_as_name)* [',']
|
||||
dotted_as_names: dotted_as_name (',' dotted_as_name)*
|
||||
dotted_name: NAME ('.' NAME)*
|
||||
global_stmt: 'global' NAME (',' NAME)*
|
||||
nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
|
||||
assert_stmt: 'assert' test [',' test]
|
||||
|
||||
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
|
||||
async_stmt: 'async' (funcdef | with_stmt | for_stmt)
|
||||
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
|
||||
while_stmt: 'while' test ':' suite ['else' ':' suite]
|
||||
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
|
||||
try_stmt: ('try' ':' suite
|
||||
((except_clause ':' suite)+
|
||||
['else' ':' suite]
|
||||
['finally' ':' suite] |
|
||||
'finally' ':' suite))
|
||||
with_stmt: 'with' with_item (',' with_item)* ':' suite
|
||||
with_item: test ['as' expr]
|
||||
# NB compile.c makes sure that the default except clause is last
|
||||
except_clause: 'except' [test ['as' NAME]]
|
||||
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
|
||||
|
||||
test: or_test ['if' or_test 'else' test] | lambdef
|
||||
test_nocond: or_test | lambdef_nocond
|
||||
lambdef: 'lambda' [varargslist] ':' test
|
||||
lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
|
||||
or_test: and_test ('or' and_test)*
|
||||
and_test: not_test ('and' not_test)*
|
||||
not_test: 'not' not_test | comparison
|
||||
comparison: expr (comp_op expr)*
|
||||
# <> isn't actually a valid comparison operator in Python. It's here for the
|
||||
# sake of a __future__ import described in PEP 401 (which really works :-)
|
||||
comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
|
||||
star_expr: '*' expr
|
||||
expr: xor_expr ('|' xor_expr)*
|
||||
xor_expr: and_expr ('^' and_expr)*
|
||||
and_expr: shift_expr ('&' shift_expr)*
|
||||
shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
||||
arith_expr: term (('+'|'-') term)*
|
||||
term: factor (('*'|'@'|'/'|'%'|'//') factor)*
|
||||
factor: ('+'|'-'|'~') factor | power
|
||||
power: atom_expr ['**' factor]
|
||||
atom_expr: ['await'] atom trailer*
|
||||
atom: ('(' [yield_expr|testlist_comp] ')' |
|
||||
'[' [testlist_comp] ']' |
|
||||
'{' [dictorsetmaker] '}' |
|
||||
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
|
||||
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
||||
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
|
||||
subscriptlist: subscript (',' subscript)* [',']
|
||||
subscript: test | [test] ':' [test] [sliceop]
|
||||
sliceop: ':' [test]
|
||||
exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
|
||||
testlist: test (',' test)* [',']
|
||||
dictorsetmaker: ( ((test ':' test | '**' expr)
|
||||
(comp_for | (',' (test ':' test | '**' expr))* [','])) |
|
||||
((test | star_expr)
|
||||
(comp_for | (',' (test | star_expr))* [','])) )
|
||||
|
||||
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
|
||||
|
||||
arglist: argument (',' argument)* [',']
|
||||
|
||||
# The reason that keywords are test nodes instead of NAME is that using NAME
|
||||
# results in an ambiguity. ast.c makes sure it's a NAME.
|
||||
# "test '=' test" is really "keyword '=' test", but we have no such token.
|
||||
# These need to be in a single rule to avoid grammar that is ambiguous
|
||||
# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
|
||||
# we explicitly match '*' here, too, to give it proper precedence.
|
||||
# Illegal combinations and orderings are blocked in ast.c:
|
||||
# multiple (test comp_for) arguments are blocked; keyword unpackings
|
||||
# that precede iterable unpackings are blocked; etc.
|
||||
argument: ( test [comp_for] |
|
||||
test '=' test |
|
||||
'**' test |
|
||||
'*' test )
|
||||
|
||||
comp_iter: comp_for | comp_if
|
||||
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter]
|
||||
comp_if: 'if' test_nocond [comp_iter]
|
||||
|
||||
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||
encoding_decl: NAME
|
||||
|
||||
yield_expr: 'yield' [yield_arg]
|
||||
yield_arg: 'from' test | testlist
|
||||
|
||||
strings: (STRING | fstring)+
|
||||
fstring: FSTRING_START fstring_content* FSTRING_END
|
||||
fstring_content: FSTRING_STRING | fstring_expr
|
||||
fstring_conversion: '!' NAME
|
||||
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}'
|
||||
fstring_format_spec: ':' fstring_content*
|
||||
@@ -1,6 +1,7 @@
|
||||
from parso.python import tree
|
||||
from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER,
|
||||
STRING, tok_name, NAME)
|
||||
STRING, tok_name, NAME, FSTRING_STRING,
|
||||
FSTRING_START, FSTRING_END)
|
||||
from parso.parser import BaseParser
|
||||
from parso.pgen2.parse import token_to_ilabel
|
||||
|
||||
@@ -50,6 +51,17 @@ class Parser(BaseParser):
|
||||
}
|
||||
default_node = tree.PythonNode
|
||||
|
||||
# Names/Keywords are handled separately
|
||||
_leaf_map = {
|
||||
STRING: tree.String,
|
||||
NUMBER: tree.Number,
|
||||
NEWLINE: tree.Newline,
|
||||
ENDMARKER: tree.EndMarker,
|
||||
FSTRING_STRING: tree.FStringString,
|
||||
FSTRING_START: tree.FStringStart,
|
||||
FSTRING_END: tree.FStringEnd,
|
||||
}
|
||||
|
||||
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
|
||||
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
|
||||
|
||||
@@ -121,16 +133,8 @@ class Parser(BaseParser):
|
||||
return tree.Keyword(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Name(value, start_pos, prefix)
|
||||
elif type == STRING:
|
||||
return tree.String(value, start_pos, prefix)
|
||||
elif type == NUMBER:
|
||||
return tree.Number(value, start_pos, prefix)
|
||||
elif type == NEWLINE:
|
||||
return tree.Newline(value, start_pos, prefix)
|
||||
elif type == ENDMARKER:
|
||||
return tree.EndMarker(value, start_pos, prefix)
|
||||
else:
|
||||
return tree.Operator(value, start_pos, prefix)
|
||||
|
||||
return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
|
||||
|
||||
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||
add_token_callback):
|
||||
|
||||
@@ -145,7 +145,7 @@ class BackslashNode(IndentationNode):
|
||||
|
||||
|
||||
def _is_magic_name(name):
|
||||
return name.value.startswith('__') and name.value.startswith('__')
|
||||
return name.value.startswith('__') and name.value.endswith('__')
|
||||
|
||||
|
||||
class PEP8Normalizer(ErrorFinder):
|
||||
|
||||
@@ -32,6 +32,14 @@ if py_version < 35:
|
||||
ERROR_DEDENT = next(_counter)
|
||||
tok_name[ERROR_DEDENT] = 'ERROR_DEDENT'
|
||||
|
||||
FSTRING_START = next(_counter)
|
||||
tok_name[FSTRING_START] = 'FSTRING_START'
|
||||
FSTRING_END = next(_counter)
|
||||
tok_name[FSTRING_END] = 'FSTRING_END'
|
||||
FSTRING_STRING = next(_counter)
|
||||
tok_name[FSTRING_STRING] = 'FSTRING_STRING'
|
||||
EXCLAMATION = next(_counter)
|
||||
tok_name[EXCLAMATION] = 'EXCLAMATION'
|
||||
|
||||
# Map from operator to number (since tokenize doesn't do this)
|
||||
|
||||
@@ -84,6 +92,7 @@ opmap_raw = """\
|
||||
//= DOUBLESLASHEQUAL
|
||||
-> RARROW
|
||||
... ELLIPSIS
|
||||
! EXCLAMATION
|
||||
"""
|
||||
|
||||
opmap = {}
|
||||
|
||||
@@ -20,14 +20,15 @@ from codecs import BOM_UTF8
|
||||
|
||||
from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap,
|
||||
NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT,
|
||||
ERROR_DEDENT)
|
||||
ERROR_DEDENT, FSTRING_STRING, FSTRING_START,
|
||||
FSTRING_END)
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines
|
||||
|
||||
|
||||
TokenCollection = namedtuple(
|
||||
'TokenCollection',
|
||||
'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
|
||||
'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens',
|
||||
)
|
||||
|
||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||
@@ -52,32 +53,35 @@ def group(*choices, **kwargs):
|
||||
return start + '|'.join(choices) + ')'
|
||||
|
||||
|
||||
def any(*choices):
|
||||
return group(*choices) + '*'
|
||||
|
||||
|
||||
def maybe(*choices):
|
||||
return group(*choices) + '?'
|
||||
|
||||
|
||||
# Return the empty string, plus all of the valid string prefixes.
|
||||
def _all_string_prefixes(version_info):
|
||||
def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False):
|
||||
def different_case_versions(prefix):
|
||||
for s in _itertools.product(*[(c, c.upper()) for c in prefix]):
|
||||
yield ''.join(s)
|
||||
# The valid string prefixes. Only contain the lower case versions,
|
||||
# and don't contain any permuations (include 'fr', but not
|
||||
# 'rf'). The various permutations will be generated.
|
||||
_valid_string_prefixes = ['b', 'r', 'u']
|
||||
valid_string_prefixes = ['b', 'r', 'u']
|
||||
if version_info >= (3, 0):
|
||||
_valid_string_prefixes.append('br')
|
||||
valid_string_prefixes.append('br')
|
||||
|
||||
if version_info >= (3, 6):
|
||||
_valid_string_prefixes += ['f', 'fr']
|
||||
result = set([''])
|
||||
if version_info >= (3, 6) and include_fstring:
|
||||
f = ['f', 'fr']
|
||||
if only_fstring:
|
||||
valid_string_prefixes = f
|
||||
result = set()
|
||||
else:
|
||||
valid_string_prefixes += f
|
||||
elif only_fstring:
|
||||
return set()
|
||||
|
||||
# if we add binary f-strings, add: ['fb', 'fbr']
|
||||
result = set([''])
|
||||
for prefix in _valid_string_prefixes:
|
||||
for prefix in valid_string_prefixes:
|
||||
for t in _itertools.permutations(prefix):
|
||||
# create a list with upper and lower versions of each
|
||||
# character
|
||||
@@ -102,6 +106,10 @@ def _get_token_collection(version_info):
|
||||
return result
|
||||
|
||||
|
||||
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
|
||||
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
|
||||
|
||||
|
||||
def _create_token_collection(version_info):
|
||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||
# number literals.
|
||||
@@ -141,6 +149,9 @@ def _create_token_collection(version_info):
|
||||
# StringPrefix can be the empty string (making it optional).
|
||||
possible_prefixes = _all_string_prefixes(version_info)
|
||||
StringPrefix = group(*possible_prefixes)
|
||||
StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True))
|
||||
fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True)
|
||||
FStringStart = group(*fstring_prefixes)
|
||||
|
||||
# Tail end of ' string.
|
||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||
@@ -150,14 +161,14 @@ def _create_token_collection(version_info):
|
||||
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
|
||||
# Tail end of """ string.
|
||||
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
|
||||
Triple = group(StringPrefix + "'''", StringPrefix + '"""')
|
||||
Triple = group(StringPrefixWithF + "'''", StringPrefixWithF + '"""')
|
||||
|
||||
# Because of leftmost-then-longest match semantics, be sure to put the
|
||||
# longest operators first (e.g., if = came before ==, == would get
|
||||
# recognized as two instances of =).
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
|
||||
Operator = group(r"\*\*=?", r">>=?", r"<<=?",
|
||||
r"//=?", r"->",
|
||||
r"[+\-*/%&@`|^=<>]=?",
|
||||
r"[+\-*/%&@`|^!=<>]=?",
|
||||
r"~")
|
||||
|
||||
Bracket = '[][(){}]'
|
||||
@@ -174,7 +185,12 @@ def _create_token_collection(version_info):
|
||||
group("'", r'\\\r?\n'),
|
||||
StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
|
||||
group('"', r'\\\r?\n'))
|
||||
PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
|
||||
pseudo_extra_pool = [Comment, Triple]
|
||||
all_quotes = '"', "'", '"""', "'''"
|
||||
if fstring_prefixes:
|
||||
pseudo_extra_pool.append(FStringStart + group(*all_quotes))
|
||||
|
||||
PseudoExtras = group(r'\\\r?\n|\Z', *pseudo_extra_pool)
|
||||
PseudoToken = group(Whitespace, capture=True) + \
|
||||
group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
|
||||
|
||||
@@ -192,18 +208,24 @@ def _create_token_collection(version_info):
|
||||
# including the opening quotes.
|
||||
single_quoted = set()
|
||||
triple_quoted = set()
|
||||
fstring_pattern_map = {}
|
||||
for t in possible_prefixes:
|
||||
for p in (t + '"', t + "'"):
|
||||
single_quoted.add(p)
|
||||
for p in (t + '"""', t + "'''"):
|
||||
triple_quoted.add(p)
|
||||
for quote in '"', "'":
|
||||
single_quoted.add(t + quote)
|
||||
|
||||
for quote in '"""', "'''":
|
||||
triple_quoted.add(t + quote)
|
||||
|
||||
for t in fstring_prefixes:
|
||||
for quote in all_quotes:
|
||||
fstring_pattern_map[t + quote] = quote
|
||||
|
||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||
'finally', 'while', 'with', 'return')
|
||||
pseudo_token_compiled = _compile(PseudoToken)
|
||||
return TokenCollection(
|
||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||
ALWAYS_BREAK_TOKENS
|
||||
fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
||||
)
|
||||
|
||||
|
||||
@@ -226,12 +248,104 @@ class PythonToken(Token):
|
||||
self._replace(type=self._get_type_name()))
|
||||
|
||||
|
||||
class FStringNode(object):
|
||||
def __init__(self, quote):
|
||||
self.quote = quote
|
||||
self.parentheses_count = 0
|
||||
self.previous_lines = ''
|
||||
self.last_string_start_pos = None
|
||||
# In the syntax there can be multiple format_spec's nested:
|
||||
# {x:{y:3}}
|
||||
self.format_spec_count = 0
|
||||
|
||||
def open_parentheses(self, character):
|
||||
self.parentheses_count += 1
|
||||
|
||||
def close_parentheses(self, character):
|
||||
self.parentheses_count -= 1
|
||||
|
||||
def allow_multiline(self):
|
||||
return len(self.quote) == 3
|
||||
|
||||
def is_in_expr(self):
|
||||
return (self.parentheses_count - self.format_spec_count) > 0
|
||||
|
||||
|
||||
def _check_fstring_ending(fstring_stack, token, from_start=False):
|
||||
fstring_end = float('inf')
|
||||
fstring_index = None
|
||||
for i, node in enumerate(fstring_stack):
|
||||
if from_start:
|
||||
if token.startswith(node.quote):
|
||||
fstring_index = i
|
||||
fstring_end = len(node.quote)
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
end = token.index(node.quote)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
if fstring_index is None or end < fstring_end:
|
||||
fstring_index = i
|
||||
fstring_end = end
|
||||
return fstring_index, fstring_end
|
||||
|
||||
|
||||
def _find_fstring_string(fstring_stack, line, lnum, pos):
|
||||
tos = fstring_stack[-1]
|
||||
if tos.is_in_expr():
|
||||
return '', pos
|
||||
else:
|
||||
new_pos = pos
|
||||
allow_multiline = tos.allow_multiline()
|
||||
if allow_multiline:
|
||||
match = fstring_string_multi_line.match(line, pos)
|
||||
else:
|
||||
match = fstring_string_single_line.match(line, pos)
|
||||
if match is None:
|
||||
string = tos.previous_lines
|
||||
else:
|
||||
if not tos.previous_lines:
|
||||
tos.last_string_start_pos = (lnum, pos)
|
||||
|
||||
string = match.group(0)
|
||||
for fstring_stack_node in fstring_stack:
|
||||
try:
|
||||
string = string[:string.index(fstring_stack_node.quote)]
|
||||
except ValueError:
|
||||
pass # The string was not found.
|
||||
|
||||
new_pos += len(string)
|
||||
if allow_multiline and string.endswith('\n'):
|
||||
tos.previous_lines += string
|
||||
string = ''
|
||||
else:
|
||||
string = tos.previous_lines + string
|
||||
|
||||
return string, new_pos
|
||||
|
||||
|
||||
def tokenize(code, version_info, start_pos=(1, 0)):
|
||||
"""Generate tokens from a the source code (string)."""
|
||||
lines = split_lines(code, keepends=True)
|
||||
return tokenize_lines(lines, version_info, start_pos=start_pos)
|
||||
|
||||
|
||||
def _print_tokens(func):
|
||||
"""
|
||||
A small helper function to help debug the tokenize_lines function.
|
||||
"""
|
||||
def wrapper(*args, **kwargs):
|
||||
for token in func(*args, **kwargs):
|
||||
print(token)
|
||||
yield token
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# @_print_tokens
|
||||
def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
"""
|
||||
A heavily modified Python standard library tokenizer.
|
||||
@@ -240,7 +354,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
token. This idea comes from lib2to3. The prefix contains all information
|
||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||
"""
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
|
||||
pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \
|
||||
_get_token_collection(version_info)
|
||||
paren_level = 0 # count parentheses
|
||||
indents = [0]
|
||||
@@ -257,6 +371,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
additional_prefix = ''
|
||||
first = True
|
||||
lnum = start_pos[0] - 1
|
||||
fstring_stack = []
|
||||
for line in lines: # loop over lines in stream
|
||||
lnum += 1
|
||||
pos = 0
|
||||
@@ -287,6 +402,37 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
continue
|
||||
|
||||
while pos < max:
|
||||
if fstring_stack:
|
||||
string, pos = _find_fstring_string(fstring_stack, line, lnum, pos)
|
||||
if string:
|
||||
yield PythonToken(
|
||||
FSTRING_STRING, string,
|
||||
fstring_stack[-1].last_string_start_pos,
|
||||
# Never has a prefix because it can start anywhere and
|
||||
# include whitespace.
|
||||
prefix=''
|
||||
)
|
||||
fstring_stack[-1].previous_lines = ''
|
||||
continue
|
||||
|
||||
if pos == max:
|
||||
break
|
||||
|
||||
rest = line[pos:]
|
||||
fstring_index, end = _check_fstring_ending(fstring_stack, rest, from_start=True)
|
||||
|
||||
if fstring_index is not None:
|
||||
yield PythonToken(
|
||||
FSTRING_END,
|
||||
fstring_stack[fstring_index].quote,
|
||||
(lnum, pos),
|
||||
prefix=additional_prefix,
|
||||
)
|
||||
additional_prefix = ''
|
||||
del fstring_stack[fstring_index:]
|
||||
pos += end
|
||||
continue
|
||||
|
||||
pseudomatch = pseudo_token.match(line, pos)
|
||||
if not pseudomatch: # scan for tokens
|
||||
txt = line[pos:]
|
||||
@@ -311,10 +457,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
|
||||
if new_line and initial not in '\r\n#':
|
||||
new_line = False
|
||||
if paren_level == 0:
|
||||
if paren_level == 0 and not fstring_stack:
|
||||
i = 0
|
||||
while line[i] == '\f':
|
||||
i += 1
|
||||
# TODO don't we need to change spos as well?
|
||||
start -= 1
|
||||
if start > indents[-1]:
|
||||
yield PythonToken(INDENT, '', spos, '')
|
||||
@@ -326,11 +473,33 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
yield PythonToken(DEDENT, '', spos, '')
|
||||
indents.pop()
|
||||
|
||||
if fstring_stack:
|
||||
fstring_index, end = _check_fstring_ending(fstring_stack, token)
|
||||
if fstring_index is not None:
|
||||
if end != 0:
|
||||
yield PythonToken(ERRORTOKEN, token[:end], spos, prefix)
|
||||
prefix = ''
|
||||
|
||||
yield PythonToken(
|
||||
FSTRING_END,
|
||||
fstring_stack[fstring_index].quote,
|
||||
(lnum, spos[1] + 1),
|
||||
prefix=prefix
|
||||
)
|
||||
del fstring_stack[fstring_index:]
|
||||
pos -= len(token) - end
|
||||
continue
|
||||
|
||||
if (initial in numchars or # ordinary number
|
||||
(initial == '.' and token != '.' and token != '...')):
|
||||
yield PythonToken(NUMBER, token, spos, prefix)
|
||||
elif initial in '\r\n':
|
||||
if not new_line and paren_level == 0:
|
||||
if any(not f.allow_multiline() for f in fstring_stack):
|
||||
# Would use fstring_stack.clear, but that's not available
|
||||
# in Python 2.
|
||||
fstring_stack[:] = []
|
||||
|
||||
if not new_line and paren_level == 0 and not fstring_stack:
|
||||
yield PythonToken(NEWLINE, token, spos, prefix)
|
||||
else:
|
||||
additional_prefix = prefix + token
|
||||
@@ -362,8 +531,12 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
break
|
||||
else: # ordinary string
|
||||
yield PythonToken(STRING, token, spos, prefix)
|
||||
elif token in fstring_pattern_map: # The start of an fstring.
|
||||
fstring_stack.append(FStringNode(fstring_pattern_map[token]))
|
||||
yield PythonToken(FSTRING_START, token, spos, prefix)
|
||||
elif is_identifier(initial): # ordinary name
|
||||
if token in always_break_tokens:
|
||||
fstring_stack[:] = []
|
||||
paren_level = 0
|
||||
while True:
|
||||
indent = indents.pop()
|
||||
@@ -378,9 +551,18 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
||||
break
|
||||
else:
|
||||
if token in '([{':
|
||||
paren_level += 1
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].open_parentheses(token)
|
||||
else:
|
||||
paren_level += 1
|
||||
elif token in ')]}':
|
||||
paren_level -= 1
|
||||
if fstring_stack:
|
||||
fstring_stack[-1].close_parentheses(token)
|
||||
else:
|
||||
paren_level -= 1
|
||||
elif token == ':' and fstring_stack \
|
||||
and fstring_stack[-1].parentheses_count == 1:
|
||||
fstring_stack[-1].format_spec_count += 1
|
||||
|
||||
try:
|
||||
# This check is needed in any case to check if it's a valid
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
"""
|
||||
If you know what an syntax tree is, you'll see that this module is pretty much
|
||||
that. The classes represent syntax elements like functions and imports.
|
||||
This is the syntax tree for Python syntaxes (2 & 3). The classes represent
|
||||
syntax elements like functions and imports.
|
||||
|
||||
This is the "business logic" part of the parser. There's a lot of logic here
|
||||
that makes it easier for Jedi (and other libraries) to deal with a Python syntax
|
||||
tree.
|
||||
All of the nodes can be traced back to the `Python grammar file
|
||||
<https://docs.python.org/3/reference/grammar.html>`_. If you want to know how
|
||||
a tree is structured, just analyse that file (for each Python version it's a
|
||||
bit different).
|
||||
|
||||
By using `get_code` on a module, you can get back the 1-to-1 representation of
|
||||
the input given to the parser. This is important if you are using refactoring.
|
||||
There's a lot of logic here that makes it easier for Jedi (and other libraries)
|
||||
to deal with a Python syntax tree.
|
||||
|
||||
The easiest way to play with this module is to use :class:`parsing.Parser`.
|
||||
:attr:`parsing.Parser.module` holds an instance of :class:`Module`:
|
||||
By using :py:meth:`parso.tree.NodeOrLeaf.get_code` on a module, you can get
|
||||
back the 1-to-1 representation of the input given to the parser. This is
|
||||
important if you want to refactor a parser tree.
|
||||
|
||||
>>> from parso import parse
|
||||
>>> parser = parse('import os')
|
||||
@@ -23,6 +25,21 @@ Any subclasses of :class:`Scope`, including :class:`Module` has an attribute
|
||||
|
||||
>>> list(module.iter_imports())
|
||||
[<ImportName: import os@1,0>]
|
||||
|
||||
Changes to the Python Grammar
|
||||
-----------------------------
|
||||
|
||||
A few things have changed when looking at Python grammar files:
|
||||
|
||||
- :class:`Param` does not exist in Python grammar files. It is essentially a
|
||||
part of a ``parameters`` node. |parso| splits it up to make it easier to
|
||||
analyse parameters. However this just makes it easier to deal with the syntax
|
||||
tree, it doesn't actually change the valid syntax.
|
||||
- A few nodes like `lambdef` and `lambdef_nocond` have been merged in the
|
||||
syntax tree to make it easier to do deal with them.
|
||||
|
||||
Parser Tree Classes
|
||||
-------------------
|
||||
"""
|
||||
|
||||
import re
|
||||
@@ -79,6 +96,10 @@ class PythonMixin(object):
|
||||
__slots__ = ()
|
||||
|
||||
def get_name_of_position(self, position):
|
||||
"""
|
||||
Given a (line, column) tuple, returns a :py:class:`Name` or ``None`` if
|
||||
there is no name at that position.
|
||||
"""
|
||||
for c in self.children:
|
||||
if isinstance(c, Leaf):
|
||||
if c.type == 'name' and c.start_pos <= position <= c.end_pos:
|
||||
@@ -97,6 +118,9 @@ class PythonLeaf(PythonMixin, Leaf):
|
||||
return split_prefix(self, self.get_start_pos_of_prefix())
|
||||
|
||||
def get_start_pos_of_prefix(self):
|
||||
"""
|
||||
Basically calls :py:meth:`parso.tree.NodeOrLeaf.get_start_pos_of_prefix`.
|
||||
"""
|
||||
# TODO it is really ugly that we have to override it. Maybe change
|
||||
# indent error leafs somehow? No idea how, though.
|
||||
previous_leaf = self.get_previous_leaf()
|
||||
@@ -190,11 +214,6 @@ class Name(_LeafWithoutNewlines):
|
||||
return node
|
||||
return None
|
||||
|
||||
if type_ in ():
|
||||
if self in node.get_defined_names():
|
||||
return node
|
||||
return None
|
||||
|
||||
if type_ == 'except_clause':
|
||||
# TODO in Python 2 this doesn't work correctly. See grammar file.
|
||||
# I think we'll just let it be. Python 2 will be gone in a few
|
||||
@@ -243,6 +262,33 @@ class String(Literal):
|
||||
return match.group(2)[:-len(match.group(1))]
|
||||
|
||||
|
||||
class FStringString(Leaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_string'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class FStringStart(Leaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_start'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class FStringEnd(Leaf):
|
||||
"""
|
||||
f-strings contain f-string expressions and normal python strings. These are
|
||||
the string parts of f-strings.
|
||||
"""
|
||||
type = 'fstring_end'
|
||||
__slots__ = ()
|
||||
|
||||
|
||||
class _StringComparisonMixin(object):
|
||||
def __eq__(self, other):
|
||||
"""
|
||||
@@ -343,7 +389,8 @@ class Module(Scope):
|
||||
|
||||
def _iter_future_import_names(self):
|
||||
"""
|
||||
:return list of str: A list of future import names.
|
||||
:return: A list of future import names.
|
||||
:rtype: list of str
|
||||
"""
|
||||
# In Python it's not allowed to use future imports after the first
|
||||
# actual (non-future) statement. However this is not a linter here,
|
||||
@@ -370,8 +417,8 @@ class Module(Scope):
|
||||
|
||||
def get_used_names(self):
|
||||
"""
|
||||
Returns all the `Name` leafs that exist in this module. Tihs includes
|
||||
both definitions and references of names.
|
||||
Returns all the :class:`Name` leafs that exist in this module. This
|
||||
includes both definitions and references of names.
|
||||
"""
|
||||
if self._used_names is None:
|
||||
# Don't directly use self._used_names to eliminate a lookup.
|
||||
@@ -410,7 +457,7 @@ class ClassOrFunc(Scope):
|
||||
|
||||
def get_decorators(self):
|
||||
"""
|
||||
:return list of Decorator:
|
||||
:rtype: list of :class:`Decorator`
|
||||
"""
|
||||
decorated = self.parent
|
||||
if decorated.type == 'decorated':
|
||||
@@ -425,13 +472,6 @@ class ClassOrFunc(Scope):
|
||||
class Class(ClassOrFunc):
|
||||
"""
|
||||
Used to store the parsed contents of a python class.
|
||||
|
||||
:param name: The Class name.
|
||||
:type name: str
|
||||
:param supers: The super classes of a Class.
|
||||
:type supers: list
|
||||
:param start_pos: The start position (line, column) of the class.
|
||||
:type start_pos: tuple(int, int)
|
||||
"""
|
||||
type = 'classdef'
|
||||
__slots__ = ()
|
||||
@@ -579,6 +619,21 @@ class Function(ClassOrFunc):
|
||||
|
||||
return scan(self.children)
|
||||
|
||||
def iter_raise_stmts(self):
|
||||
"""
|
||||
Returns a generator of `raise_stmt`. Includes raise statements inside try-except blocks
|
||||
"""
|
||||
def scan(children):
|
||||
for element in children:
|
||||
if element.type == 'raise_stmt' \
|
||||
or element.type == 'keyword' and element.value == 'raise':
|
||||
yield element
|
||||
if element.type in _RETURN_STMT_CONTAINERS:
|
||||
for e in scan(element.children):
|
||||
yield e
|
||||
|
||||
return scan(self.children)
|
||||
|
||||
def is_generator(self):
|
||||
"""
|
||||
:return bool: Checks if a function is a generator or not.
|
||||
@@ -1049,7 +1104,7 @@ class Param(PythonBaseNode):
|
||||
@property
|
||||
def annotation(self):
|
||||
"""
|
||||
The default is the test node that appears after `->`. Is `None` in case
|
||||
The default is the test node that appears after `:`. Is `None` in case
|
||||
no annotation is present.
|
||||
"""
|
||||
tfpdef = self._tfpdef()
|
||||
@@ -1133,4 +1188,5 @@ class CompFor(PythonBaseNode):
|
||||
"""
|
||||
Returns the a list of `Name` that the comprehension defines.
|
||||
"""
|
||||
return _defined_names(self.children[1])
|
||||
# allow async for
|
||||
return _defined_names(self.children[self.children.index('for') + 1])
|
||||
|
||||
@@ -4,12 +4,12 @@ from parso._compatibility import utf8_repr, encoding, py_version
|
||||
|
||||
def search_ancestor(node, *node_types):
|
||||
"""
|
||||
Recursively looks at the parents of a node and checks if the type names
|
||||
match.
|
||||
Recursively looks at the parents of a node and returns the first found node
|
||||
that matches node_types. Returns ``None`` if no matching node is found.
|
||||
|
||||
:param node: The node that is looked at.
|
||||
:param node_types: A tuple or a string of type names that are
|
||||
searched for.
|
||||
:param node: The ancestors of this node will be checked.
|
||||
:param node_types: type names that are searched for.
|
||||
:type node_types: tuple of str
|
||||
"""
|
||||
while True:
|
||||
node = node.parent
|
||||
@@ -22,6 +22,10 @@ class NodeOrLeaf(object):
|
||||
The base class for nodes and leaves.
|
||||
"""
|
||||
__slots__ = ()
|
||||
type = None
|
||||
'''
|
||||
The type is a string that typically matches the types of the grammar file.
|
||||
'''
|
||||
|
||||
def get_root_node(self):
|
||||
"""
|
||||
@@ -35,8 +39,8 @@ class NodeOrLeaf(object):
|
||||
|
||||
def get_next_sibling(self):
|
||||
"""
|
||||
The node immediately following the invocant in their parent's children
|
||||
list. If the invocant does not have a next sibling, it is None
|
||||
Returns the node immediately following this node in this parent's
|
||||
children list. If this node does not have a next sibling, it is None
|
||||
"""
|
||||
# Can't use index(); we need to test by identity
|
||||
for i, child in enumerate(self.parent.children):
|
||||
@@ -48,8 +52,8 @@ class NodeOrLeaf(object):
|
||||
|
||||
def get_previous_sibling(self):
|
||||
"""
|
||||
The node/leaf immediately preceding the invocant in their parent's
|
||||
children list. If the invocant does not have a previous sibling, it is
|
||||
Returns the node immediately preceding this node in this parent's
|
||||
children list. If this node does not have a previous sibling, it is
|
||||
None.
|
||||
"""
|
||||
# Can't use index(); we need to test by identity
|
||||
@@ -62,7 +66,7 @@ class NodeOrLeaf(object):
|
||||
def get_previous_leaf(self):
|
||||
"""
|
||||
Returns the previous leaf in the parser tree.
|
||||
Raises an IndexError if it's the first element in the parser tree.
|
||||
Returns `None` if this is the first element in the parser tree.
|
||||
"""
|
||||
node = self
|
||||
while True:
|
||||
@@ -85,7 +89,7 @@ class NodeOrLeaf(object):
|
||||
def get_next_leaf(self):
|
||||
"""
|
||||
Returns the next leaf in the parser tree.
|
||||
Returns `None` if it's the last element in the parser tree.
|
||||
Returns None if this is the last element in the parser tree.
|
||||
"""
|
||||
node = self
|
||||
while True:
|
||||
@@ -135,19 +139,19 @@ class NodeOrLeaf(object):
|
||||
@abstractmethod
|
||||
def get_first_leaf(self):
|
||||
"""
|
||||
Returns the first leaf of a node or itself it's a leaf.
|
||||
Returns the first leaf of a node or itself if this is a leaf.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_last_leaf(self):
|
||||
"""
|
||||
Returns the last leaf of a node or itself it's a leaf.
|
||||
Returns the last leaf of a node or itself if this is a leaf.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_code(self, include_prefix=True):
|
||||
"""
|
||||
Returns the code that was the input of the parser.
|
||||
Returns the code that was input the input for the parser for this node.
|
||||
|
||||
:param include_prefix: Removes the prefix (whitespace and comments) of
|
||||
e.g. a statement.
|
||||
@@ -155,13 +159,27 @@ class NodeOrLeaf(object):
|
||||
|
||||
|
||||
class Leaf(NodeOrLeaf):
|
||||
'''
|
||||
Leafs are basically tokens with a better API. Leafs exactly know where they
|
||||
were defined and what text preceeds them.
|
||||
'''
|
||||
__slots__ = ('value', 'parent', 'line', 'column', 'prefix')
|
||||
|
||||
def __init__(self, value, start_pos, prefix=''):
|
||||
self.value = value
|
||||
'''
|
||||
:py:func:`str` The value of the current token.
|
||||
'''
|
||||
self.start_pos = start_pos
|
||||
self.prefix = prefix
|
||||
'''
|
||||
:py:func:`str` Typically a mixture of whitespace and comments. Stuff
|
||||
that is syntactically irrelevant for the syntax tree.
|
||||
'''
|
||||
self.parent = None
|
||||
'''
|
||||
The parent :class:`BaseNode` of this leaf.
|
||||
'''
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
@@ -219,9 +237,7 @@ class TypedLeaf(Leaf):
|
||||
class BaseNode(NodeOrLeaf):
|
||||
"""
|
||||
The super class for all nodes.
|
||||
|
||||
If you create custom nodes, you will probably want to inherit from this
|
||||
``BaseNode``.
|
||||
A node has children, a type and possibly a parent node.
|
||||
"""
|
||||
__slots__ = ('children', 'parent')
|
||||
type = None
|
||||
@@ -230,7 +246,14 @@ class BaseNode(NodeOrLeaf):
|
||||
for c in children:
|
||||
c.parent = self
|
||||
self.children = children
|
||||
"""
|
||||
A list of :class:`NodeOrLeaf` child nodes.
|
||||
"""
|
||||
self.parent = None
|
||||
'''
|
||||
The parent :class:`BaseNode` of this leaf.
|
||||
None if this is the root node.
|
||||
'''
|
||||
|
||||
@property
|
||||
def start_pos(self):
|
||||
@@ -254,6 +277,14 @@ class BaseNode(NodeOrLeaf):
|
||||
return self._get_code_for_children(self.children, include_prefix)
|
||||
|
||||
def get_leaf_for_position(self, position, include_prefixes=False):
|
||||
"""
|
||||
Get the :py:class:`parso.tree.Leaf` at ``position``
|
||||
|
||||
:param tuple position: A position tuple, row, column. Rows start from 1
|
||||
:param bool include_prefixes: If ``False``, ``None`` will be returned if ``position`` falls
|
||||
on whitespace or comments before a leaf
|
||||
:return: :py:class:`parso.tree.Leaf` at ``position``, or ``None``
|
||||
"""
|
||||
def binary_search(lower, upper):
|
||||
if lower == upper:
|
||||
element = self.children[lower]
|
||||
@@ -307,7 +338,7 @@ class Node(BaseNode):
|
||||
|
||||
class ErrorNode(BaseNode):
|
||||
"""
|
||||
A node that containes valid nodes/leaves that we're follow by a token that
|
||||
A node that contains valid nodes/leaves that we're follow by a token that
|
||||
was invalid. This basically means that the leaf after this node is where
|
||||
Python would mark a syntax error.
|
||||
"""
|
||||
|
||||
@@ -11,10 +11,10 @@ Version = namedtuple('Version', 'major, minor, micro')
|
||||
|
||||
def split_lines(string, keepends=False):
|
||||
r"""
|
||||
A str.splitlines for Python code. In contrast to Python's ``str.splitlines``,
|
||||
Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`,
|
||||
looks at form feeds and other special characters as normal text. Just
|
||||
splits ``\n`` and ``\r\n``.
|
||||
Also different: Returns ``['']`` for an empty string input.
|
||||
Also different: Returns ``[""]`` for an empty string input.
|
||||
|
||||
In Python 2.7 form feeds are used as normal characters when using
|
||||
str.splitlines. However in Python 3 somewhere there was a decision to split
|
||||
@@ -48,9 +48,14 @@ def split_lines(string, keepends=False):
|
||||
return re.split('\n|\r\n', string)
|
||||
|
||||
|
||||
def python_bytes_to_unicode(source, default_encoding='utf-8', errors='strict'):
|
||||
def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'):
|
||||
"""
|
||||
`errors` can be 'strict', 'replace' or 'ignore'.
|
||||
Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a
|
||||
unicode object like in :py:meth:`bytes.decode`.
|
||||
|
||||
:param encoding: See :py:meth:`bytes.decode` documentation.
|
||||
:param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be
|
||||
``'strict'``, ``'replace'`` or ``'ignore'``.
|
||||
"""
|
||||
def detect_encoding():
|
||||
"""
|
||||
@@ -70,7 +75,7 @@ def python_bytes_to_unicode(source, default_encoding='utf-8', errors='strict'):
|
||||
return possible_encoding.group(1)
|
||||
else:
|
||||
# the default if nothing else has been set -> PEP 263
|
||||
return default_encoding
|
||||
return encoding
|
||||
|
||||
if isinstance(source, unicode):
|
||||
# only cast str/bytes
|
||||
|
||||
2
setup.py
2
setup.py
@@ -14,7 +14,7 @@ readme = open('README.rst').read() + '\n\n' + open('CHANGELOG.rst').read()
|
||||
|
||||
setup(name='parso',
|
||||
version=parso.__version__,
|
||||
description='A Python parser written in Python.',
|
||||
description='A Python Parser',
|
||||
author=__AUTHOR__,
|
||||
author_email=__AUTHOR_EMAIL__,
|
||||
include_package_data=True,
|
||||
|
||||
@@ -141,7 +141,7 @@ FAILING_EXAMPLES = [
|
||||
|
||||
# f-strings
|
||||
'f"{}"',
|
||||
'f"{\\}"',
|
||||
r'f"{\}"',
|
||||
'f"{\'\\\'}"',
|
||||
'f"{#}"',
|
||||
"f'{1!b}'",
|
||||
|
||||
@@ -484,3 +484,21 @@ def test_indentation_issue(differ):
|
||||
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=2)
|
||||
|
||||
|
||||
def test_endmarker_newline(differ):
|
||||
code1 = dedent('''\
|
||||
docu = None
|
||||
# some comment
|
||||
result = codet
|
||||
incomplete_dctassign = {
|
||||
"module"
|
||||
|
||||
if "a":
|
||||
x = 3 # asdf
|
||||
''')
|
||||
|
||||
code2 = code1.replace('codet', 'coded')
|
||||
|
||||
differ.initialize(code1)
|
||||
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
import pytest
|
||||
from textwrap import dedent
|
||||
|
||||
from parso import load_grammar, ParserSyntaxError
|
||||
from parso.python.fstring import tokenize
|
||||
from parso.python.tokenize import tokenize
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def grammar():
|
||||
return load_grammar(language="python-f-string")
|
||||
return load_grammar(version='3.6')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'code', [
|
||||
'{1}',
|
||||
'{1:}',
|
||||
'',
|
||||
'{1!a}',
|
||||
'{1!a:1}',
|
||||
@@ -26,22 +28,12 @@ def grammar():
|
||||
'{{{1}',
|
||||
'1{{2{{3',
|
||||
'}}',
|
||||
'{:}}}',
|
||||
|
||||
# Invalid, but will be checked, later.
|
||||
'{}',
|
||||
'{1:}',
|
||||
'{:}',
|
||||
'{:1}',
|
||||
'{!:}',
|
||||
'{!}',
|
||||
'{!a}',
|
||||
'{1:{}}',
|
||||
'{1:{:}}',
|
||||
]
|
||||
)
|
||||
def test_valid(code, grammar):
|
||||
fstring = grammar.parse(code, error_recovery=False)
|
||||
code = 'f"""%s"""' % code
|
||||
module = grammar.parse(code, error_recovery=False)
|
||||
fstring = module.children[0]
|
||||
assert fstring.type == 'fstring'
|
||||
assert fstring.get_code() == code
|
||||
|
||||
@@ -52,24 +44,46 @@ def test_valid(code, grammar):
|
||||
'{',
|
||||
'{1!{a}}',
|
||||
'{!{a}}',
|
||||
'{}',
|
||||
'{:}',
|
||||
'{:}}}',
|
||||
'{:1}',
|
||||
'{!:}',
|
||||
'{!}',
|
||||
'{!a}',
|
||||
'{1:{}}',
|
||||
'{1:{:}}',
|
||||
]
|
||||
)
|
||||
def test_invalid(code, grammar):
|
||||
code = 'f"""%s"""' % code
|
||||
with pytest.raises(ParserSyntaxError):
|
||||
grammar.parse(code, error_recovery=False)
|
||||
|
||||
# It should work with error recovery.
|
||||
#grammar.parse(code, error_recovery=True)
|
||||
grammar.parse(code, error_recovery=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('code', 'start_pos', 'positions'), [
|
||||
('code', 'positions'), [
|
||||
# 2 times 2, 5 because python expr and endmarker.
|
||||
('}{', (2, 3), [(2, 3), (2, 4), (2, 5), (2, 5)]),
|
||||
(' :{ 1 : } ', (1, 0), [(1, 2), (1, 3), (1, 6), (1, 8), (1, 10)]),
|
||||
('\n{\nfoo\n }', (2, 1), [(3, 0), (3, 1), (5, 1), (5, 2)]),
|
||||
('f"}{"', [(1, 0), (1, 2), (1, 3), (1, 4), (1, 5)]),
|
||||
('f" :{ 1 : } "', [(1, 0), (1, 2), (1, 4), (1, 6), (1, 8), (1, 9),
|
||||
(1, 10), (1, 11), (1, 12), (1, 13)]),
|
||||
('f"""\n {\nfoo\n }"""', [(1, 0), (1, 4), (2, 1), (3, 0), (4, 1),
|
||||
(4, 2), (4, 5)]),
|
||||
]
|
||||
)
|
||||
def test_tokenize_start_pos(code, start_pos, positions):
|
||||
tokens = tokenize(code, start_pos)
|
||||
def test_tokenize_start_pos(code, positions):
|
||||
tokens = list(tokenize(code, version_info=(3, 6)))
|
||||
assert positions == [p.start_pos for p in tokens]
|
||||
|
||||
|
||||
def test_roundtrip(grammar):
|
||||
code = dedent("""\
|
||||
f'''s{
|
||||
str.uppe
|
||||
'''
|
||||
""")
|
||||
tree = grammar.parse(code)
|
||||
assert tree.get_code() == code
|
||||
|
||||
@@ -125,6 +125,10 @@ def get_return_stmts(code):
|
||||
return list(parse(code).children[0].iter_return_stmts())
|
||||
|
||||
|
||||
def get_raise_stmts(code, child):
|
||||
return list(parse(code).children[child].iter_raise_stmts())
|
||||
|
||||
|
||||
def test_yields(each_version):
|
||||
y, = get_yield_exprs('def x(): yield', each_version)
|
||||
assert y.value == 'yield'
|
||||
@@ -149,3 +153,30 @@ def test_returns():
|
||||
|
||||
r, = get_return_stmts('def x(): return 1')
|
||||
assert r.type == 'return_stmt'
|
||||
|
||||
|
||||
def test_raises():
|
||||
code = """
|
||||
def single_function():
|
||||
raise Exception
|
||||
def top_function():
|
||||
def inner_function():
|
||||
raise NotImplementedError()
|
||||
inner_function()
|
||||
raise Exception
|
||||
def top_function_three():
|
||||
try:
|
||||
raise NotImplementedError()
|
||||
except NotImplementedError:
|
||||
pass
|
||||
raise Exception
|
||||
"""
|
||||
|
||||
r = get_raise_stmts(code, 0) # Lists in a simple Function
|
||||
assert len(list(r)) == 1
|
||||
|
||||
r = get_raise_stmts(code, 1) # Doesn't Exceptions list in closures
|
||||
assert len(list(r)) == 1
|
||||
|
||||
r = get_raise_stmts(code, 2) # Lists inside try-catch
|
||||
assert len(list(r)) == 2
|
||||
|
||||
@@ -114,6 +114,22 @@ def _get_actual_exception(code):
|
||||
# Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in
|
||||
# certain places. But in others this error makes sense.
|
||||
return [wanted, "SyntaxError: can't use starred expression here"], line_nr
|
||||
elif wanted == 'SyntaxError: f-string: unterminated string':
|
||||
wanted = 'SyntaxError: EOL while scanning string literal'
|
||||
elif wanted == 'SyntaxError: f-string expression part cannot include a backslash':
|
||||
return [
|
||||
wanted,
|
||||
"SyntaxError: EOL while scanning string literal",
|
||||
"SyntaxError: unexpected character after line continuation character",
|
||||
], line_nr
|
||||
elif wanted == "SyntaxError: f-string: expecting '}'":
|
||||
wanted = 'SyntaxError: EOL while scanning string literal'
|
||||
elif wanted == 'SyntaxError: f-string: empty expression not allowed':
|
||||
wanted = 'SyntaxError: invalid syntax'
|
||||
elif wanted == "SyntaxError: f-string expression part cannot include '#'":
|
||||
wanted = 'SyntaxError: invalid syntax'
|
||||
elif wanted == "SyntaxError: f-string: single '}' is not allowed":
|
||||
wanted = 'SyntaxError: invalid syntax'
|
||||
return [wanted], line_nr
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,8 @@ import pytest
|
||||
from parso._compatibility import py_version
|
||||
from parso.utils import split_lines, parse_version_string
|
||||
from parso.python.token import (
|
||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT)
|
||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT,
|
||||
FSTRING_START)
|
||||
from parso.python import tokenize
|
||||
from parso import parse
|
||||
from parso.python.tokenize import PythonToken
|
||||
@@ -162,8 +163,9 @@ def test_ur_literals():
|
||||
token_list = _get_token_list(literal)
|
||||
typ, result_literal, _, _ = token_list[0]
|
||||
if is_literal:
|
||||
assert typ == STRING
|
||||
assert result_literal == literal
|
||||
if typ != FSTRING_START:
|
||||
assert typ == STRING
|
||||
assert result_literal == literal
|
||||
else:
|
||||
assert typ == NAME
|
||||
|
||||
@@ -175,6 +177,7 @@ def test_ur_literals():
|
||||
# Starting with Python 3.3 this ordering is also possible.
|
||||
if py_version >= 33:
|
||||
check('Rb""')
|
||||
|
||||
# Starting with Python 3.6 format strings where introduced.
|
||||
check('fr""', is_literal=py_version >= 36)
|
||||
check('rF""', is_literal=py_version >= 36)
|
||||
|
||||
9
tox.ini
9
tox.ini
@@ -1,20 +1,21 @@
|
||||
[tox]
|
||||
envlist = py26, py27, py33, py34, py35, py36
|
||||
envlist = py27, py33, py34, py35, py36, py37
|
||||
[testenv]
|
||||
deps =
|
||||
pytest>=3.0.7
|
||||
{env:_PARSO_TEST_PYTEST_DEP:pytest>=3.0.7}
|
||||
# For --lf and --ff.
|
||||
pytest-cache
|
||||
setenv =
|
||||
# https://github.com/tomchristie/django-rest-framework/issues/1957
|
||||
# tox corrupts __pycache__, solution from here:
|
||||
PYTHONDONTWRITEBYTECODE=1
|
||||
py26,py33: _PARSO_TEST_PYTEST_DEP=pytest>=3.0.7,<3.3
|
||||
commands =
|
||||
py.test {posargs:parso test}
|
||||
pytest {posargs:parso test}
|
||||
[testenv:cov]
|
||||
deps =
|
||||
coverage
|
||||
{[testenv]deps}
|
||||
commands =
|
||||
coverage run --source parso -m py.test
|
||||
coverage run --source parso -m pytest
|
||||
coverage report
|
||||
|
||||
Reference in New Issue
Block a user