diff --git a/.gitignore b/.gitignore index 7186cd5..0c817be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ *~ *.sw? *.pyc -.tox .coveralls.yml .coverage /build/ diff --git a/.travis.yml b/.travis.yml index 1487000..c79c1c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,28 +1,31 @@ dist: xenial language: python python: - - 2.7 - - 3.4 - - 3.5 - 3.6 - 3.7 - 3.8.2 - nightly - - pypy2.7-6.0 - - pypy3.5-6.0 matrix: allow_failures: - python: nightly include: - - python: 3.5 - env: TOXENV=py35-coverage + - python: 3.8 + install: + - 'pip install .[qa]' + script: + # Ignore F401, which are unused imports. flake8 is a primitive tool and is sometimes wrong. + - 'flake8 --extend-ignore F401 parso test/*.py setup.py scripts/' + - mypy parso + - python: 3.8.2 + script: + - 'pip install coverage' + - 'coverage run -m pytest' + - 'coverage report' + after_script: + - | + pip install --quiet coveralls + coveralls install: - - pip install --quiet tox-travis + - pip install .[testing] script: - - tox -after_script: - - | - if [ "${TOXENV%-coverage}" == "$TOXENV" ]; then - pip install --quiet coveralls; - coveralls; - fi + - pytest diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4a08454..9da62f6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,14 @@ Changelog --------- +Unreleased (XXXX-XX-XX) ++++++++++++++++++++++++ + +- Dropped Support for Python 2.7, 3.4, 3.5 +- It's possible to use ``pathlib.Path`` objects now in the API +- The stubs are gone, we are now using annotations +- A lot of smaller refactorings + 0.7.1 (2020-07-24) ++++++++++++++++++ diff --git a/MANIFEST.in b/MANIFEST.in index fb90b35..e54f3ea 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,7 +5,6 @@ include AUTHORS.txt include .coveragerc include conftest.py include pytest.ini -include tox.ini include parso/python/grammar*.txt recursive-include test * recursive-include docs * diff --git a/README.rst b/README.rst index d87f2ef..6a048e6 100644 --- a/README.rst +++ b/README.rst @@ -31,7 +31,7 @@ A simple example: .. code-block:: python >>> import parso - >>> module = parso.parse('hello + 1', version="3.6") + >>> module = parso.parse('hello + 1', version="3.9") >>> expr = module.children[0] >>> expr PythonNode(arith_expr, [, , ]) diff --git a/conftest.py b/conftest.py index 65364bf..35a1846 100644 --- a/conftest.py +++ b/conftest.py @@ -2,8 +2,8 @@ import re import tempfile import shutil import logging -import sys import os +from pathlib import Path import pytest @@ -13,8 +13,7 @@ from parso.utils import parse_version_string collect_ignore = ["setup.py"] -VERSIONS_2 = '2.7', -VERSIONS_3 = '3.4', '3.5', '3.6', '3.7', '3.8' +_SUPPORTED_VERSIONS = '3.6', '3.7', '3.8', '3.9', '3.10' @pytest.fixture(scope='session') @@ -30,7 +29,7 @@ def clean_parso_cache(): """ old = cache._default_cache_path tmp = tempfile.mkdtemp(prefix='parso-test-') - cache._default_cache_path = tmp + cache._default_cache_path = Path(tmp) yield cache._default_cache_path = old shutil.rmtree(tmp) @@ -52,18 +51,13 @@ def pytest_generate_tests(metafunc): ids=[c.name for c in cases] ) elif 'each_version' in metafunc.fixturenames: - metafunc.parametrize('each_version', VERSIONS_2 + VERSIONS_3) - elif 'each_py2_version' in metafunc.fixturenames: - metafunc.parametrize('each_py2_version', VERSIONS_2) - elif 'each_py3_version' in metafunc.fixturenames: - metafunc.parametrize('each_py3_version', VERSIONS_3) - elif 'version_ge_py36' in metafunc.fixturenames: - metafunc.parametrize('version_ge_py36', ['3.6', '3.7', '3.8']) + metafunc.parametrize('each_version', _SUPPORTED_VERSIONS) elif 'version_ge_py38' in metafunc.fixturenames: - metafunc.parametrize('version_ge_py38', ['3.8']) + ge38 = set(_SUPPORTED_VERSIONS) - {'3.6', '3.7'} + metafunc.parametrize('version_ge_py38', sorted(ge38)) -class NormalizerIssueCase(object): +class NormalizerIssueCase: """ Static Analysis cases lie in the static_analysis folder. The tests also start with `#!`, like the goto_definition tests. @@ -95,7 +89,7 @@ def pytest_configure(config): #root.addHandler(ch) -class Checker(): +class Checker: def __init__(self, version, is_passing): self.version = version self._is_passing = is_passing @@ -137,37 +131,16 @@ def works_not_in_py(each_version): @pytest.fixture -def works_in_py2(each_version): - return Checker(each_version, each_version.startswith('2')) +def works_in_py(each_version): + return Checker(each_version, True) -@pytest.fixture -def works_ge_py27(each_version): - version_info = parse_version_string(each_version) - return Checker(each_version, version_info >= (2, 7)) - - -@pytest.fixture -def works_ge_py3(each_version): - version_info = parse_version_string(each_version) - return Checker(each_version, version_info >= (3, 0)) - - -@pytest.fixture -def works_ge_py35(each_version): - version_info = parse_version_string(each_version) - return Checker(each_version, version_info >= (3, 5)) - -@pytest.fixture -def works_ge_py36(each_version): - version_info = parse_version_string(each_version) - return Checker(each_version, version_info >= (3, 6)) - @pytest.fixture def works_ge_py38(each_version): version_info = parse_version_string(each_version) return Checker(each_version, version_info >= (3, 8)) + @pytest.fixture def works_ge_py39(each_version): version_info = parse_version_string(each_version) diff --git a/deploy-master.sh b/deploy-master.sh index 876eb1d..79d25f8 100755 --- a/deploy-master.sh +++ b/deploy-master.sh @@ -23,7 +23,7 @@ cd $PROJECT_NAME git checkout $BRANCH # Test first. -tox +pytest # Create tag tag=v$(python3 -c "import $PROJECT_NAME; print($PROJECT_NAME.__version__)") diff --git a/docs/conf.py b/docs/conf.py index f1d0d59..b11d4ce 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,8 +43,8 @@ source_encoding = 'utf-8' master_doc = 'index' # General information about the project. -project = u'parso' -copyright = u'parso contributors' +project = 'parso' +copyright = 'parso contributors' import parso from parso.utils import version_info @@ -200,8 +200,8 @@ latex_elements = { # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'parso.tex', u'parso documentation', - u'parso contributors', 'manual'), + ('index', 'parso.tex', 'parso documentation', + 'parso contributors', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -230,8 +230,8 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'parso', u'parso Documentation', - [u'parso contributors'], 1) + ('index', 'parso', 'parso Documentation', + ['parso contributors'], 1) ] # If true, show URL addresses after external links. @@ -244,8 +244,8 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'parso', u'parso documentation', - u'parso contributors', 'parso', 'Awesome Python autocompletion library.', + ('index', 'parso', 'parso documentation', + 'parso contributors', 'parso', 'Awesome Python autocompletion library.', 'Miscellaneous'), ] @@ -273,7 +273,7 @@ autodoc_default_flags = [] # -- Options for intersphinx module -------------------------------------------- intersphinx_mapping = { - 'http://docs.python.org/': ('https://docs.python.org/3.6', None), + 'http://docs.python.org/': ('https://docs.python.org/3', None), } diff --git a/docs/docs/development.rst b/docs/docs/development.rst index f723162..1332c66 100644 --- a/docs/docs/development.rst +++ b/docs/docs/development.rst @@ -21,18 +21,18 @@ The deprecation process is as follows: Testing ------- -The test suite depends on ``tox`` and ``pytest``:: +The test suite depends on ``pytest``:: - pip install tox pytest + pip install pytest -To run the tests for all supported Python versions:: +To run the tests use the following:: - tox + pytest -If you want to test only a specific Python version (e.g. Python 2.7), it's as +If you want to test only a specific Python version (e.g. Python 3.9), it's as easy as:: - tox -e py27 + python3.9 -m pytest Tests are also run automatically on `Travis CI `_. diff --git a/parso/__init__.py b/parso/__init__.py index f331984..eecb86d 100644 --- a/parso/__init__.py +++ b/parso/__init__.py @@ -13,7 +13,7 @@ Parso consists of a small API to parse Python and analyse the syntax tree. A simple example: >>> import parso ->>> module = parso.parse('hello + 1', version="3.6") +>>> module = parso.parse('hello + 1', version="3.9") >>> expr = module.children[0] >>> expr PythonNode(arith_expr, [, , ]) diff --git a/parso/__init__.pyi b/parso/__init__.pyi deleted file mode 100644 index 5f72f07..0000000 --- a/parso/__init__.pyi +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Any, Optional, Union - -from parso.grammar import Grammar as Grammar, load_grammar as load_grammar -from parso.parser import ParserSyntaxError as ParserSyntaxError -from parso.utils import python_bytes_to_unicode as python_bytes_to_unicode, split_lines as split_lines - -__version__: str = ... - -def parse( - code: Optional[Union[str, bytes]], - *, - version: Optional[str] = None, - error_recovery: bool = True, - path: Optional[str] = None, - start_symbol: Optional[str] = None, - cache: bool = False, - diff_cache: bool = False, - cache_path: Optional[str] = None, -) -> Any: ... diff --git a/parso/_compatibility.py b/parso/_compatibility.py index 4c966d6..58b186f 100644 --- a/parso/_compatibility.py +++ b/parso/_compatibility.py @@ -1,101 +1,3 @@ -""" -To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been -created. Clearly there is huge need to use conforming syntax. -""" -import os -import sys import platform -# unicode function -try: - unicode = unicode -except NameError: - unicode = str - is_pypy = platform.python_implementation() == 'PyPy' - - -def use_metaclass(meta, *bases): - """ Create a class with a metaclass. """ - if not bases: - bases = (object,) - return meta("HackClass", bases, {}) - - -try: - encoding = sys.stdout.encoding - if encoding is None: - encoding = 'utf-8' -except AttributeError: - encoding = 'ascii' - - -def u(string): - """Cast to unicode DAMMIT! - Written because Python2 repr always implicitly casts to a string, so we - have to cast back to a unicode (and we know that we always deal with valid - unicode, because we check that in the beginning). - """ - if sys.version_info.major >= 3: - return str(string) - - if not isinstance(string, unicode): - return unicode(str(string), 'UTF-8') - return string - - -try: - # Python 3.3+ - FileNotFoundError = FileNotFoundError -except NameError: - # Python 2.7 (both IOError + OSError) - FileNotFoundError = EnvironmentError -try: - # Python 3.3+ - PermissionError = PermissionError -except NameError: - # Python 2.7 (both IOError + OSError) - PermissionError = EnvironmentError - - -def utf8_repr(func): - """ - ``__repr__`` methods in Python 2 don't allow unicode objects to be - returned. Therefore cast them to utf-8 bytes in this decorator. - """ - def wrapper(self): - result = func(self) - if isinstance(result, unicode): - return result.encode('utf-8') - else: - return result - - if sys.version_info.major >= 3: - return func - else: - return wrapper - - -if sys.version_info < (3, 5): - """ - A super-minimal shim around listdir that behave like - scandir for the information we need. - """ - class _DirEntry: - - def __init__(self, name, basepath): - self.name = name - self.basepath = basepath - - @property - def path(self): - return os.path.join(self.basepath, self.name) - - def stat(self): - # won't follow symlinks - return os.lstat(os.path.join(self.basepath, self.name)) - - def scandir(dir): - return [_DirEntry(name, dir) for name in os.listdir(dir)] -else: - from os import scandir diff --git a/parso/cache.py b/parso/cache.py index 8644423..1ced9ac 100644 --- a/parso/cache.py +++ b/parso/cache.py @@ -5,17 +5,11 @@ import hashlib import gc import shutil import platform -import errno import logging import warnings - -try: - import cPickle as pickle -except: - import pickle - -from parso._compatibility import FileNotFoundError, PermissionError, scandir -from parso.file_io import FileIO +import pickle +from pathlib import Path +from typing import Dict, Any LOG = logging.getLogger(__name__) @@ -64,21 +58,19 @@ _VERSION_TAG = '%s-%s%s-%s' % ( """ Short name for distinguish Python implementations and versions. -It's like `sys.implementation.cache_tag` but for Python2 -we generate something similar. See: -http://docs.python.org/3/library/sys.html#sys.implementation +It's a bit similar to `sys.implementation.cache_tag`. +See: http://docs.python.org/3/library/sys.html#sys.implementation """ def _get_default_cache_path(): if platform.system().lower() == 'windows': - dir_ = os.path.join(os.getenv('LOCALAPPDATA') - or os.path.expanduser('~'), 'Parso', 'Parso') + dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso') elif platform.system().lower() == 'darwin': - dir_ = os.path.join('~', 'Library', 'Caches', 'Parso') + dir_ = Path('~', 'Library', 'Caches', 'Parso') else: - dir_ = os.path.join(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') - return os.path.expanduser(dir_) + dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') + return dir_.expanduser() _default_cache_path = _get_default_cache_path() @@ -93,21 +85,22 @@ On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, _CACHE_CLEAR_THRESHOLD = 60 * 60 * 24 -def _get_cache_clear_lock(cache_path = None): + +def _get_cache_clear_lock_path(cache_path=None): """ The path where the cache lock is stored. Cache lock will prevent continous cache clearing and only allow garbage collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD). """ - cache_path = cache_path or _get_default_cache_path() - return FileIO(os.path.join(cache_path, "PARSO-CACHE-LOCK")) + cache_path = cache_path or _default_cache_path + return cache_path.joinpath("PARSO-CACHE-LOCK") -parser_cache = {} +parser_cache: Dict[str, Any] = {} -class _NodeCacheItem(object): +class _NodeCacheItem: def __init__(self, node, lines, change_time=None): self.node = node self.lines = lines @@ -142,16 +135,9 @@ def load_module(hashed_grammar, file_io, cache_path=None): def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) try: - try: - if p_time > os.path.getmtime(cache_path): - # Cache is outdated - return None - except OSError as e: - if e.errno == errno.ENOENT: - # In Python 2 instead of an IOError here we get an OSError. - raise FileNotFoundError - else: - raise + if p_time > os.path.getmtime(cache_path): + # Cache is outdated + return None with open(cache_path, 'rb') as f: gc.disable() @@ -225,52 +211,65 @@ def clear_inactive_cache( inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL, ): if cache_path is None: - cache_path = _get_default_cache_path() - if not os.path.exists(cache_path): + cache_path = _default_cache_path + if not cache_path.exists(): return False - for version_path in os.listdir(cache_path): - version_path = os.path.join(cache_path, version_path) - if not os.path.isdir(version_path): + for dirname in os.listdir(cache_path): + version_path = cache_path.joinpath(dirname) + if not version_path.is_dir(): continue - for file in scandir(version_path): - if ( - file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL - <= time.time() - ): + for file in os.scandir(version_path): + if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time(): try: os.remove(file.path) - except OSError: # silently ignore all failures + except OSError: # silently ignore all failures continue else: return True -def _remove_cache_and_update_lock(cache_path = None): - lock = _get_cache_clear_lock(cache_path=cache_path) - clear_lock_time = lock.get_last_modified() +def _touch(path): + try: + os.utime(path, None) + except FileNotFoundError: + try: + file = open(path, 'a') + file.close() + except (OSError, IOError): # TODO Maybe log this? + return False + return True + + +def _remove_cache_and_update_lock(cache_path=None): + lock_path = _get_cache_clear_lock_path(cache_path=cache_path) + try: + clear_lock_time = os.path.getmtime(lock_path) + except FileNotFoundError: + clear_lock_time = None if ( - clear_lock_time is None # first time + clear_lock_time is None # first time or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time() ): - if not lock._touch(): + if not _touch(lock_path): # First make sure that as few as possible other cleanup jobs also # get started. There is still a race condition but it's probably # not a big problem. return False - clear_inactive_cache(cache_path = cache_path) + clear_inactive_cache(cache_path=cache_path) + def _get_hashed_path(hashed_grammar, path, cache_path=None): directory = _get_cache_directory_path(cache_path=cache_path) - file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest() + file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest() return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) def _get_cache_directory_path(cache_path=None): if cache_path is None: cache_path = _default_cache_path - directory = os.path.join(cache_path, _VERSION_TAG) - if not os.path.exists(directory): + directory = cache_path.joinpath(_VERSION_TAG) + if not directory.exists(): os.makedirs(directory) return directory diff --git a/parso/file_io.py b/parso/file_io.py index 34b41ec..568ce9d 100644 --- a/parso/file_io.py +++ b/parso/file_io.py @@ -1,9 +1,12 @@ import os -from parso._compatibility import FileNotFoundError +from pathlib import Path +from typing import Union -class FileIO(object): - def __init__(self, path): +class FileIO: + def __init__(self, path: Union[os.PathLike, str]): + if isinstance(path, str): + path = Path(path) self.path = path def read(self): # Returns bytes/str @@ -19,20 +22,8 @@ class FileIO(object): """ try: return os.path.getmtime(self.path) - except OSError: - # Might raise FileNotFoundError, OSError for Python 2 - return None - - def _touch(self): - try: - os.utime(self.path, None) except FileNotFoundError: - try: - file = open(self.path, 'a') - file.close() - except (OSError, IOError): # TODO Maybe log this? - return False - return True + return None def __repr__(self): return '%s(%s)' % (self.__class__.__name__, self.path) @@ -40,7 +31,7 @@ class FileIO(object): class KnownContentFileIO(FileIO): def __init__(self, path, content): - super(KnownContentFileIO, self).__init__(path) + super().__init__(path) self._content = content def read(self): diff --git a/parso/grammar.py b/parso/grammar.py index 6233005..43b7be9 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -1,9 +1,12 @@ import hashlib import os +from typing import Generic, TypeVar, Union, Dict, Optional, Any +from pathlib import Path -from parso._compatibility import FileNotFoundError, is_pypy +from parso._compatibility import is_pypy from parso.pgen2 import generate_grammar -from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string +from parso.utils import split_lines, python_bytes_to_unicode, \ + PythonVersionInfo, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize from parso.python.token import PythonTokenTypes @@ -13,23 +16,27 @@ from parso.python.parser import Parser as PythonParser from parso.python.errors import ErrorFinderConfig from parso.python import pep8 from parso.file_io import FileIO, KnownContentFileIO -from parso.normalizer import RefactoringNormalizer +from parso.normalizer import RefactoringNormalizer, NormalizerConfig -_loaded_grammars = {} +_loaded_grammars: Dict[str, 'Grammar'] = {} + +_NodeT = TypeVar("_NodeT") -class Grammar(object): +class Grammar(Generic[_NodeT]): """ :py:func:`parso.load_grammar` returns instances of this class. Creating custom none-python grammars by calling this is not supported, yet. - """ - #:param text: A BNF representation of your grammar. - _error_normalizer_config = None - _token_namespace = None - _default_normalizer_config = pep8.PEP8NormalizerConfig() - def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None): + :param text: A BNF representation of your grammar. + """ + _start_nonterminal: str + _error_normalizer_config: Optional[ErrorFinderConfig] = None + _token_namespace: Any = None + _default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig() + + def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None): self._pgen_grammar = generate_grammar( text, token_namespace=self._get_token_namespace() @@ -39,7 +46,16 @@ class Grammar(object): self._diff_parser = diff_parser self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() - def parse(self, code=None, **kwargs): + def parse(self, + code: Union[str, bytes] = None, + *, + error_recovery=True, + path: Union[os.PathLike, str] = None, + start_symbol: str = None, + cache=False, + diff_cache=False, + cache_path: Union[os.PathLike, str] = None, + file_io: FileIO = None) -> _NodeT: """ If you want to parse a Python file you want to start here, most likely. @@ -74,22 +90,14 @@ class Grammar(object): :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a :py:class:`parso.python.tree.Module`. """ - if 'start_pos' in kwargs: - raise TypeError("parse() got an unexpected keyword argument.") - return self._parse(code=code, **kwargs) - - def _parse(self, code=None, error_recovery=True, path=None, - start_symbol=None, cache=False, diff_cache=False, - cache_path=None, file_io=None, start_pos=(1, 0)): - """ - Wanted python3.5 * operator and keyword only arguments. Therefore just - wrap it all. - start_pos here is just a parameter internally used. Might be public - sometime in the future. - """ if code is None and path is None and file_io is None: raise TypeError("Please provide either code or a path.") + if isinstance(path, str): + path = Path(path) + if isinstance(cache_path, str): + cache_path = Path(cache_path) + if start_symbol is None: start_symbol = self._start_nonterminal @@ -98,14 +106,14 @@ class Grammar(object): if file_io is None: if code is None: - file_io = FileIO(path) + file_io = FileIO(path) # type: ignore else: file_io = KnownContentFileIO(path, code) if cache and file_io.path is not None: module_node = load_module(self._hashed, file_io, cache_path=cache_path) if module_node is not None: - return module_node + return module_node # type: ignore if code is None: code = file_io.read() @@ -124,7 +132,7 @@ class Grammar(object): module_node = module_cache_item.node old_lines = module_cache_item.lines if old_lines == lines: - return module_node + return module_node # type: ignore new_node = self._diff_parser( self._pgen_grammar, self._tokenizer, module_node @@ -133,12 +141,12 @@ class Grammar(object): new_lines=lines ) try_to_save_module(self._hashed, file_io, new_node, lines, - # Never pickle in pypy, it's slow as hell. - pickling=cache and not is_pypy, - cache_path=cache_path) - return new_node + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return new_node # type: ignore - tokens = self._tokenizer(lines, start_pos=start_pos) + tokens = self._tokenizer(lines) p = self._parser( self._pgen_grammar, @@ -149,10 +157,10 @@ class Grammar(object): if cache or diff_cache: try_to_save_module(self._hashed, file_io, root_node, lines, - # Never pickle in pypy, it's slow as hell. - pickling=cache and not is_pypy, - cache_path=cache_path) - return root_node + # Never pickle in pypy, it's slow as hell. + pickling=cache and not is_pypy, + cache_path=cache_path) + return root_node # type: ignore def _get_token_namespace(self): ns = self._token_namespace @@ -206,8 +214,8 @@ class PythonGrammar(Grammar): _token_namespace = PythonTokenTypes _start_nonterminal = 'file_input' - def __init__(self, version_info, bnf_text): - super(PythonGrammar, self).__init__( + def __init__(self, version_info: PythonVersionInfo, bnf_text: str): + super().__init__( bnf_text, tokenizer=self._tokenize_lines, parser=PythonParser, @@ -216,14 +224,14 @@ class PythonGrammar(Grammar): self.version_info = version_info def _tokenize_lines(self, lines, **kwargs): - return tokenize_lines(lines, self.version_info, **kwargs) + return tokenize_lines(lines, version_info=self.version_info, **kwargs) def _tokenize(self, code): # Used by Jedi. - return tokenize(code, self.version_info) + return tokenize(code, version_info=self.version_info) -def load_grammar(**kwargs): +def load_grammar(*, version: str = None, path: str = None): """ Loads a :py:class:`parso.Grammar`. The default version is the current Python version. @@ -231,30 +239,26 @@ def load_grammar(**kwargs): :param str version: A python version string, e.g. ``version='3.8'``. :param str path: A path to a grammar file """ - def load_grammar(language='python', version=None, path=None): - if language == 'python': - version_info = parse_version_string(version) + version_info = parse_version_string(version) - file = path or os.path.join( - 'python', - 'grammar%s%s.txt' % (version_info.major, version_info.minor) + file = path or os.path.join( + 'python', + 'grammar%s%s.txt' % (version_info.major, version_info.minor) + ) + + global _loaded_grammars + path = os.path.join(os.path.dirname(__file__), file) + try: + return _loaded_grammars[path] + except KeyError: + try: + with open(path) as f: + bnf_text = f.read() + + grammar = PythonGrammar(version_info, bnf_text) + return _loaded_grammars.setdefault(path, grammar) + except FileNotFoundError: + message = "Python version %s.%s is currently not supported." % ( + version_info.major, version_info.minor ) - - global _loaded_grammars - path = os.path.join(os.path.dirname(__file__), file) - try: - return _loaded_grammars[path] - except KeyError: - try: - with open(path) as f: - bnf_text = f.read() - - grammar = PythonGrammar(version_info, bnf_text) - return _loaded_grammars.setdefault(path, grammar) - except FileNotFoundError: - message = "Python version %s.%s is currently not supported." % (version_info.major, version_info.minor) - raise NotImplementedError(message) - else: - raise NotImplementedError("No support for language %s." % language) - - return load_grammar(**kwargs) + raise NotImplementedError(message) diff --git a/parso/grammar.pyi b/parso/grammar.pyi deleted file mode 100644 index e5cd2ea..0000000 --- a/parso/grammar.pyi +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union -from typing_extensions import Literal - -from parso.utils import PythonVersionInfo - -_Token = Any -_NodeT = TypeVar("_NodeT") - -class Grammar(Generic[_NodeT]): - _default_normalizer_config: Optional[Any] = ... - _error_normalizer_config: Optional[Any] = None - _start_nonterminal: str = ... - _token_namespace: Optional[str] = None - def __init__( - self, - text: str, - tokenizer: Callable[[Sequence[str], int], Sequence[_Token]], - parser: Any = ..., - diff_parser: Any = ..., - ) -> None: ... - def parse( - self, - code: Union[str, bytes] = ..., - error_recovery: bool = ..., - path: Optional[str] = ..., - start_symbol: Optional[str] = ..., - cache: bool = ..., - diff_cache: bool = ..., - cache_path: Optional[str] = ..., - ) -> _NodeT: ... - -class PythonGrammar(Grammar): - version_info: PythonVersionInfo - def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ... - -def load_grammar( - language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ... -) -> Grammar: ... diff --git a/parso/normalizer.py b/parso/normalizer.py index 09fde99..a95f029 100644 --- a/parso/normalizer.py +++ b/parso/normalizer.py @@ -1,6 +1,5 @@ from contextlib import contextmanager - -from parso._compatibility import use_metaclass +from typing import Dict, List class _NormalizerMeta(type): @@ -11,9 +10,9 @@ class _NormalizerMeta(type): return new_cls -class Normalizer(use_metaclass(_NormalizerMeta)): - _rule_type_instances = {} - _rule_value_instances = {} +class Normalizer(metaclass=_NormalizerMeta): + _rule_type_instances: Dict[str, List[type]] = {} + _rule_value_instances: Dict[str, List[type]] = {} def __init__(self, grammar, config): self.grammar = grammar @@ -77,7 +76,7 @@ class Normalizer(use_metaclass(_NormalizerMeta)): return True @classmethod - def register_rule(cls, **kwargs): + def register_rule(cls, *, value=None, values=(), type=None, types=()): """ Use it as a class decorator:: @@ -86,10 +85,6 @@ class Normalizer(use_metaclass(_NormalizerMeta)): class MyRule(Rule): error_code = 42 """ - return cls._register_rule(**kwargs) - - @classmethod - def _register_rule(cls, value=None, values=(), type=None, types=()): values = list(values) types = list(types) if value is not None: @@ -110,7 +105,7 @@ class Normalizer(use_metaclass(_NormalizerMeta)): return decorator -class NormalizerConfig(object): +class NormalizerConfig: normalizer_class = Normalizer def create_normalizer(self, grammar): @@ -120,7 +115,7 @@ class NormalizerConfig(object): return self.normalizer_class(grammar, self) -class Issue(object): +class Issue: def __init__(self, node, code, message): self.code = code """ @@ -150,9 +145,9 @@ class Issue(object): return '<%s: %s>' % (self.__class__.__name__, self.code) -class Rule(object): - code = None - message = None +class Rule: + code: int + message: str def __init__(self, normalizer): self._normalizer = normalizer @@ -194,10 +189,10 @@ class RefactoringNormalizer(Normalizer): try: return self._node_to_str_map[node] except KeyError: - return super(RefactoringNormalizer, self).visit(node) + return super().visit(node) def visit_leaf(self, leaf): try: return self._node_to_str_map[leaf] except KeyError: - return super(RefactoringNormalizer, self).visit_leaf(leaf) + return super().visit_leaf(leaf) diff --git a/parso/parser.py b/parso/parser.py index 66f5443..3b25f35 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -23,6 +23,8 @@ within the statement. This lowers memory usage and cpu time and reduces the complexity of the ``Parser`` (there's another parser sitting inside ``Statement``, which produces ``Array`` and ``Call``). """ +from typing import Dict + from parso import tree from parso.pgen2.generator import ReservedString @@ -71,7 +73,7 @@ class Stack(list): return list(iterate()) -class StackNode(object): +class StackNode: def __init__(self, dfa): self.dfa = dfa self.nodes = [] @@ -86,7 +88,7 @@ class StackNode(object): def _token_to_transition(grammar, type_, value): # Map from token to label - if type_.contains_syntax: + if type_.value.contains_syntax: # Check for reserved words (keywords) try: return grammar.reserved_syntax_strings[value] @@ -96,7 +98,7 @@ def _token_to_transition(grammar, type_, value): return type_ -class BaseParser(object): +class BaseParser: """Parser engine. A Parser instance contains state pertaining to the current token @@ -108,11 +110,10 @@ class BaseParser(object): When a syntax error occurs, error_recovery() is called. """ - node_map = {} + node_map: Dict[str, type] = {} default_node = tree.Node - leaf_map = { - } + leaf_map: Dict[str, type] = {} default_leaf = tree.Leaf def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): diff --git a/parso/pgen2/__init__.pyi b/parso/pgen2/__init__.pyi deleted file mode 100644 index 46c149f..0000000 --- a/parso/pgen2/__init__.pyi +++ /dev/null @@ -1 +0,0 @@ -from parso.pgen2.generator import generate_grammar as generate_grammar diff --git a/parso/pgen2/generator.py b/parso/pgen2/generator.py index 9bf54ae..db6e1cb 100644 --- a/parso/pgen2/generator.py +++ b/parso/pgen2/generator.py @@ -27,11 +27,14 @@ because we made some optimizations. """ from ast import literal_eval +from typing import TypeVar, Generic, Mapping, Sequence, Set, Union from parso.pgen2.grammar_parser import GrammarParser, NFAState +_TokenTypeT = TypeVar("_TokenTypeT") -class Grammar(object): + +class Grammar(Generic[_TokenTypeT]): """ Once initialized, this class supplies the grammar tables for the parsing engine implemented by parse.py. The parsing engine @@ -41,18 +44,21 @@ class Grammar(object): dfas. """ - def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings): - self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]] + def __init__(self, + start_nonterminal: str, + rule_to_dfas: Mapping[str, Sequence['DFAState[_TokenTypeT]']], + reserved_syntax_strings: Mapping[str, 'ReservedString']): + self.nonterminal_to_dfas = rule_to_dfas self.reserved_syntax_strings = reserved_syntax_strings self.start_nonterminal = start_nonterminal -class DFAPlan(object): +class DFAPlan: """ Plans are used for the parser to create stack nodes and do the proper DFA state transitions. """ - def __init__(self, next_dfa, dfa_pushes=[]): + def __init__(self, next_dfa: 'DFAState', dfa_pushes: Sequence['DFAState'] = []): self.next_dfa = next_dfa self.dfa_pushes = dfa_pushes @@ -60,7 +66,7 @@ class DFAPlan(object): return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) -class DFAState(object): +class DFAState(Generic[_TokenTypeT]): """ The DFAState object is the core class for pretty much anything. DFAState are the vertices of an ordered graph while arcs and transitions are the @@ -70,20 +76,21 @@ class DFAState(object): transitions are then calculated to connect the DFA state machines that have different nonterminals. """ - def __init__(self, from_rule, nfa_set, final): + def __init__(self, from_rule: str, nfa_set: Set[NFAState], final: NFAState): assert isinstance(nfa_set, set) assert isinstance(next(iter(nfa_set)), NFAState) assert isinstance(final, NFAState) self.from_rule = from_rule self.nfa_set = nfa_set - self.arcs = {} # map from terminals/nonterminals to DFAState + # map from terminals/nonterminals to DFAState + self.arcs: Mapping[str, DFAState] = {} # In an intermediary step we set these nonterminal arcs (which has the # same structure as arcs). These don't contain terminals anymore. - self.nonterminal_arcs = {} + self.nonterminal_arcs: Mapping[str, DFAState] = {} # Transitions are basically the only thing that the parser is using # with is_final. Everyting else is purely here to create a parser. - self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan] + self.transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] = {} self.is_final = final in nfa_set def add_arc(self, next_, label): @@ -111,22 +118,20 @@ class DFAState(object): return False return True - __hash__ = None # For Py3 compatibility. - def __repr__(self): return '<%s: %s is_final=%s>' % ( self.__class__.__name__, self.from_rule, self.is_final ) -class ReservedString(object): +class ReservedString: """ Most grammars will have certain keywords and operators that are mentioned in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER). This class basically is the former. """ - def __init__(self, value): + def __init__(self, value: str): self.value = value def __repr__(self): @@ -149,7 +154,6 @@ def _simplify_dfas(dfas): for j in range(i + 1, len(dfas)): state_j = dfas[j] if state_i == state_j: - #print " unify", i, j del dfas[j] for state in dfas: state.unifystate(state_j, state_i) @@ -233,7 +237,7 @@ def _dump_dfas(dfas): print(" %s -> %d" % (nonterminal, dfas.index(next_))) -def generate_grammar(bnf_grammar, token_namespace): +def generate_grammar(bnf_grammar: str, token_namespace) -> Grammar: """ ``bnf_text`` is a grammar in extended BNF (using * for repetition, + for at-least-once repetition, [] for optional parts, | for alternatives and () @@ -245,19 +249,19 @@ def generate_grammar(bnf_grammar, token_namespace): rule_to_dfas = {} start_nonterminal = None for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse(): - #_dump_nfa(nfa_a, nfa_z) + # _dump_nfa(nfa_a, nfa_z) dfas = _make_dfas(nfa_a, nfa_z) - #_dump_dfas(dfas) + # _dump_dfas(dfas) # oldlen = len(dfas) _simplify_dfas(dfas) # newlen = len(dfas) rule_to_dfas[nfa_a.from_rule] = dfas - #print(nfa_a.from_rule, oldlen, newlen) + # print(nfa_a.from_rule, oldlen, newlen) if start_nonterminal is None: start_nonterminal = nfa_a.from_rule - reserved_strings = {} + reserved_strings: Mapping[str, ReservedString] = {} for nonterminal, dfas in rule_to_dfas.items(): for dfa_state in dfas: for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): @@ -272,7 +276,7 @@ def generate_grammar(bnf_grammar, token_namespace): dfa_state.transitions[transition] = DFAPlan(next_dfa) _calculate_tree_traversal(rule_to_dfas) - return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) + return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) # type: ignore def _make_transition(token_namespace, reserved_syntax_strings, label): diff --git a/parso/pgen2/generator.pyi b/parso/pgen2/generator.pyi deleted file mode 100644 index 0d67a18..0000000 --- a/parso/pgen2/generator.pyi +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union - -from parso.pgen2.grammar_parser import NFAState - -_TokenTypeT = TypeVar("_TokenTypeT") - -class Grammar(Generic[_TokenTypeT]): - nonterminal_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]] - reserved_syntax_strings: Mapping[str, ReservedString] - start_nonterminal: str - def __init__( - self, - start_nonterminal: str, - rule_to_dfas: Mapping[str, Sequence[DFAState]], - reserved_syntax_strings: Mapping[str, ReservedString], - ) -> None: ... - -class DFAPlan: - next_dfa: DFAState - dfa_pushes: Sequence[DFAState] - -class DFAState(Generic[_TokenTypeT]): - from_rule: str - nfa_set: Set[NFAState] - is_final: bool - arcs: Mapping[str, DFAState] # map from all terminals/nonterminals to DFAState - nonterminal_arcs: Mapping[str, DFAState] - transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan] - def __init__( - self, from_rule: str, nfa_set: Set[NFAState], final: NFAState - ) -> None: ... - -class ReservedString: - value: str - def __init__(self, value: str) -> None: ... - def __repr__(self) -> str: ... - -def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]: ... diff --git a/parso/pgen2/grammar_parser.py b/parso/pgen2/grammar_parser.py index 0be7209..582efb4 100644 --- a/parso/pgen2/grammar_parser.py +++ b/parso/pgen2/grammar_parser.py @@ -4,25 +4,49 @@ # Modifications: # Copyright David Halter and Contributors # Modifications are dual-licensed: MIT and PSF. +from typing import Optional, Iterator, Tuple, List from parso.python.tokenize import tokenize from parso.utils import parse_version_string from parso.python.token import PythonTokenTypes -class GrammarParser(): +class NFAArc: + def __init__(self, next_: 'NFAState', nonterminal_or_string: Optional[str]): + self.next: NFAState = next_ + self.nonterminal_or_string: Optional[str] = nonterminal_or_string + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string) + + +class NFAState: + def __init__(self, from_rule: str): + self.from_rule: str = from_rule + self.arcs: List[NFAArc] = [] + + def add_arc(self, next_, nonterminal_or_string=None): + assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) + assert isinstance(next_, NFAState) + self.arcs.append(NFAArc(next_, nonterminal_or_string)) + + def __repr__(self): + return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) + + +class GrammarParser: """ The parser for Python grammar files. """ - def __init__(self, bnf_grammar): + def __init__(self, bnf_grammar: str): self._bnf_grammar = bnf_grammar self.generator = tokenize( bnf_grammar, - version_info=parse_version_string('3.6') + version_info=parse_version_string('3.9') ) self._gettoken() # Initialize lookahead - def parse(self): + def parse(self) -> Iterator[Tuple[NFAState, NFAState]]: # grammar: (NEWLINE | rule)* ENDMARKER while self.type != PythonTokenTypes.ENDMARKER: while self.type == PythonTokenTypes.NEWLINE: @@ -134,26 +158,3 @@ class GrammarParser(): line = self._bnf_grammar.splitlines()[self.begin[0] - 1] raise SyntaxError(msg, ('', self.begin[0], self.begin[1], line)) - - -class NFAArc(object): - def __init__(self, next_, nonterminal_or_string): - self.next = next_ - self.nonterminal_or_string = nonterminal_or_string - - def __repr__(self): - return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string) - - -class NFAState(object): - def __init__(self, from_rule): - self.from_rule = from_rule - self.arcs = [] # List[nonterminal (str), NFAState] - - def add_arc(self, next_, nonterminal_or_string=None): - assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) - assert isinstance(next_, NFAState) - self.arcs.append(NFAArc(next_, nonterminal_or_string)) - - def __repr__(self): - return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) diff --git a/parso/pgen2/grammar_parser.pyi b/parso/pgen2/grammar_parser.pyi deleted file mode 100644 index b73a5a6..0000000 --- a/parso/pgen2/grammar_parser.pyi +++ /dev/null @@ -1,20 +0,0 @@ -from typing import Generator, List, Optional, Tuple - -from parso.python.token import TokenType - -class GrammarParser: - generator: Generator[TokenType, None, None] - def __init__(self, bnf_grammar: str) -> None: ... - def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]: ... - -class NFAArc: - next: NFAState - nonterminal_or_string: Optional[str] - def __init__( - self, next_: NFAState, nonterminal_or_string: Optional[str] - ) -> None: ... - -class NFAState: - from_rule: str - arcs: List[NFAArc] - def __init__(self, from_rule: str) -> None: ... diff --git a/parso/python/diff.py b/parso/python/diff.py index 1863413..ba999fa 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -247,7 +247,7 @@ def _update_positions(nodes, line_offset, last_leaf): _update_positions(children, line_offset, last_leaf) -class DiffParser(object): +class DiffParser: """ An advanced form of parsing a file faster. Unfortunately comes with huge side effects. It changes the given module. @@ -514,7 +514,7 @@ class DiffParser(object): yield token -class _NodesTreeNode(object): +class _NodesTreeNode: _ChildrenGroup = namedtuple( '_ChildrenGroup', 'prefix children line_offset last_line_offset_leaf') @@ -589,7 +589,7 @@ class _NodesTreeNode(object): return '<%s: %s>' % (self.__class__.__name__, self.tree_node) -class _NodesTree(object): +class _NodesTree: def __init__(self, module): self._base_node = _NodesTreeNode(module) self._working_stack = [self._base_node] diff --git a/parso/python/errors.py b/parso/python/errors.py index 54a6641..d8343c7 100644 --- a/parso/python/errors.py +++ b/parso/python/errors.py @@ -15,10 +15,11 @@ _MAX_BLOCK_SIZE = 20 _MAX_INDENT_COUNT = 100 ALLOWED_FUTURES = ( 'nested_scopes', 'generators', 'division', 'absolute_import', - 'with_statement', 'print_function', 'unicode_literals', + 'with_statement', 'print_function', 'unicode_literals', 'generator_stop', ) _COMP_FOR_TYPES = ('comp_for', 'sync_comp_for') + def _get_rhs_name(node, version): type_ = node.type if type_ == "lambdef": @@ -39,7 +40,7 @@ def _get_rhs_name(node, version): elif ( first == "(" and (second == ")" - or (len(node.children) == 3 and node.children[1].type == "testlist_comp")) + or (len(node.children) == 3 and node.children[1].type == "testlist_comp")) ): return "tuple" elif first == "(": @@ -79,8 +80,7 @@ def _get_rhs_name(node, version): elif trailer.children[0] == ".": return "attribute" elif ( - ("expr" in type_ - and "star_expr" not in type_) # is a substring + ("expr" in type_ and "star_expr" not in type_) # is a substring or "_test" in type_ or type_ in ("term", "factor") ): @@ -91,7 +91,8 @@ def _get_rhs_name(node, version): return "tuple" elif type_ == "fstring": return "f-string expression" - return type_ # shouldn't reach here + return type_ # shouldn't reach here + def _iter_stmts(scope): """ @@ -173,13 +174,11 @@ def _iter_definition_exprs_from_lists(exprlist): if child.children[0] == '(': testlist_comp = child.children[1] if testlist_comp.type == 'testlist_comp': - for expr in _iter_definition_exprs_from_lists(testlist_comp): - yield expr + yield from _iter_definition_exprs_from_lists(testlist_comp) return else: # It's a paren that doesn't do anything, like 1 + (1) - for c in check_expr(testlist_comp): - yield c + yield from check_expr(testlist_comp) return elif child.children[0] == '[': yield testlist_comp @@ -188,11 +187,9 @@ def _iter_definition_exprs_from_lists(exprlist): if exprlist.type in _STAR_EXPR_PARENTS: for child in exprlist.children[::2]: - for c in check_expr(child): # Python 2 sucks - yield c + yield from check_expr(child) else: - for c in check_expr(exprlist): # Python 2 sucks - yield c + yield from check_expr(exprlist) def _get_expr_stmt_definition_exprs(expr_stmt): @@ -225,7 +222,7 @@ def _any_fstring_error(version, node): return search_ancestor(node, "fstring") -class _Context(object): +class _Context: def __init__(self, node, add_syntax_error, parent_context=None): self.node = node self.blocks = [] @@ -353,7 +350,7 @@ class ErrorFinder(Normalizer): Searches for errors in the syntax tree. """ def __init__(self, *args, **kwargs): - super(ErrorFinder, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._error_dict = {} self.version = self.grammar.version_info @@ -377,7 +374,7 @@ class ErrorFinder(Normalizer): # might find errors in there that should be ignored, because # the error node itself already shows that there's an issue. return '' - return super(ErrorFinder, self).visit(node) + return super().visit(node) @contextmanager def visit_node(self, node): @@ -424,7 +421,9 @@ class ErrorFinder(Normalizer): message = 'invalid syntax' if ( self.version >= (3, 9) - and leaf.value in _get_token_collection(self.version).always_break_tokens + and leaf.value in _get_token_collection( + self.version + ).always_break_tokens ): message = "f-string: " + message else: @@ -440,7 +439,7 @@ class ErrorFinder(Normalizer): self.context = self.context.add_context(parent) # The rest is rule based. - return super(ErrorFinder, self).visit_leaf(leaf) + return super().visit_leaf(leaf) def _add_indentation_error(self, spacing, message): self.add_issue(spacing, 903, "IndentationError: " + message) @@ -466,7 +465,7 @@ class IndentationRule(Rule): code = 903 def _get_message(self, message, node): - message = super(IndentationRule, self)._get_message(message, node) + message = super()._get_message(message, node) return "IndentationError: " + message @@ -491,7 +490,7 @@ class SyntaxRule(Rule): code = 901 def _get_message(self, message, node): - message = super(SyntaxRule, self)._get_message(message, node) + message = super()._get_message(message, node) if ( "f-string" not in message and _any_fstring_error(self._normalizer.version, node) @@ -589,9 +588,6 @@ class _NameChecks(SyntaxRule): if leaf.value == '__debug__' and leaf.is_definition(): return True - if leaf.value == 'None' and self._normalizer.version < (3, 0) \ - and leaf.is_definition(): - self.add_issue(leaf, message=self.message_none) @ErrorFinder.register_rule(type='string') @@ -601,7 +597,6 @@ class _StringChecks(SyntaxRule): def is_issue(self, leaf): string_prefix = leaf.string_prefix.lower() if 'b' in string_prefix \ - and self._normalizer.version >= (3, 0) \ and any(c for c in leaf.value if ord(c) > 127): # b'ä' return True @@ -609,14 +604,9 @@ class _StringChecks(SyntaxRule): if 'r' not in string_prefix: # Raw strings don't need to be checked if they have proper # escaping. - is_bytes = self._normalizer.version < (3, 0) - if 'b' in string_prefix: - is_bytes = True - if 'u' in string_prefix: - is_bytes = False payload = leaf._get_payload() - if is_bytes: + if 'b' in string_prefix: payload = payload.encode('utf-8') func = codecs.escape_decode else: @@ -675,10 +665,6 @@ class _ReturnAndYieldChecks(SyntaxRule): and any(self._normalizer.context.node.iter_yield_exprs()): if leaf.value == 'return' and leaf.parent.type == 'return_stmt': return True - elif leaf.value == 'yield' \ - and leaf.get_next_leaf() != 'from' \ - and self._normalizer.version == (3, 5): - self.add_issue(self.get_node(leaf), message=self.message_async_yield) @ErrorFinder.register_rule(type='strings') @@ -693,12 +679,10 @@ class _BytesAndStringMix(SyntaxRule): def is_issue(self, node): first = node.children[0] - # In Python 2 it's allowed to mix bytes and unicode. - if self._normalizer.version >= (3, 0): - first_is_bytes = self._is_bytes_literal(first) - for string in node.children[1:]: - if first_is_bytes != self._is_bytes_literal(string): - return True + first_is_bytes = self._is_bytes_literal(first) + for string in node.children[1:]: + if first_is_bytes != self._is_bytes_literal(string): + return True @ErrorFinder.register_rule(type='import_as_names') @@ -731,8 +715,6 @@ class _FutureImportRule(SyntaxRule): for from_name, future_name in node.get_paths(): name = future_name.value allowed_futures = list(ALLOWED_FUTURES) - if self._normalizer.version >= (3, 5): - allowed_futures.append('generator_stop') if self._normalizer.version >= (3, 7): allowed_futures.append('annotations') if name == 'braces': @@ -755,19 +737,6 @@ class _StarExprRule(SyntaxRule): # [*[] for a in [1]] if node.parent.children[1].type in _COMP_FOR_TYPES: self.add_issue(node, message=self.message_iterable_unpacking) - if self._normalizer.version <= (3, 4): - n = search_ancestor(node, 'for_stmt', 'expr_stmt') - found_definition = False - if n is not None: - if n.type == 'expr_stmt': - exprs = _get_expr_stmt_definition_exprs(n) - else: - exprs = _get_for_stmt_definition_exprs(n) - if node in exprs: - found_definition = True - - if not found_definition: - self.add_issue(node, message=self.message_assignment) @ErrorFinder.register_rule(types=_STAR_EXPR_PARENTS) @@ -892,22 +861,10 @@ class _ArglistRule(SyntaxRule): arg_set = set() kw_only = False kw_unpacking_only = False - is_old_starred = False - # In python 3 this would be a bit easier (stars are part of - # argument), but we have to understand both. for argument in node.children: if argument == ',': continue - if argument in ('*', '**'): - # Python < 3.5 has the order engraved in the grammar - # file. No need to do anything here. - is_old_starred = True - continue - if is_old_starred: - is_old_starred = False - continue - if argument.type == 'argument': first = argument.children[0] if _is_argument_comprehension(argument) and len(node.children) >= 2: @@ -1149,6 +1106,7 @@ class _CompForRule(_CheckAssignmentRule): class _ExprStmtRule(_CheckAssignmentRule): message = "illegal expression for augmented assignment" extended_message = "'{target}' is an " + message + def is_issue(self, node): augassign = node.children[1] is_aug_assign = augassign != '=' and augassign.type != 'annassign' @@ -1178,6 +1136,7 @@ class _ExprStmtRule(_CheckAssignmentRule): ), ) + @ErrorFinder.register_rule(type='with_item') class _WithItemRule(_CheckAssignmentRule): def is_issue(self, with_item): diff --git a/parso/python/grammar27.txt b/parso/python/grammar27.txt deleted file mode 100644 index 29f1b82..0000000 --- a/parso/python/grammar27.txt +++ /dev/null @@ -1,143 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() and input() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: stmt* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ':' suite -parameters: '(' [varargslist] ')' -varargslist: ((fpdef ['=' test] ',')* - ('*' NAME [',' '**' NAME] | '**' NAME) | - fpdef ['=' test] (',' fpdef ['=' test])* [',']) -fpdef: NAME | '(' fplist ')' -fplist: fpdef (',' fpdef)* [','] - -stmt: simple_stmt | compound_stmt | NEWLINE -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | exec_stmt | assert_stmt) -expr_stmt: testlist (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist))*) -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -print_stmt: 'print' ( [ test (',' test)* [','] ] | - '>>' test [ (',' test)+ [','] ] ) -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test [',' test [',' test]]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -import_from: ('from' ('.'* dotted_name | '.'+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -exec_stmt: 'exec' expr ['in' test [',' test]] -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test [('as' | ',') test]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -# Backward compatibility cruft to support: -# [ x for x in lambda: True, lambda: False if x() ] -# even while also allowing: -# lambda x: 5 if x else 2 -# (But not a mix of the two) -testlist_safe: old_test [(',' old_test)+ [',']] -old_test: or_test | old_lambdef -old_lambdef: 'lambda' [varargslist] ':' old_test - -test: or_test ['if' or_test 'else' test] | lambdef -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [listmaker] ']' | - '{' [dictorsetmaker] '}' | - '`' testlist1 '`' | - NAME | NUMBER | strings) -strings: STRING+ -listmaker: test ( list_for | (',' test)* [','] ) -testlist_comp: test ( sync_comp_for | (',' test)* [','] ) -lambdef: 'lambda' [varargslist] ':' test -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: expr (',' expr)* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) | - (test (sync_comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [testlist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [sync_comp_for] | test '=' test - -list_iter: list_for | list_if -list_for: 'for' exprlist 'in' testlist_safe [list_iter] -list_if: 'if' old_test [list_iter] - -comp_iter: sync_comp_for | comp_if -sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' old_test [comp_iter] - -testlist1: test (',' test)* - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [testlist] diff --git a/parso/python/grammar33.txt b/parso/python/grammar33.txt deleted file mode 100644 index dd93d8b..0000000 --- a/parso/python/grammar33.txt +++ /dev/null @@ -1,134 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed in PEP 306, -# "How to Change Python's Grammar" - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: stmt* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt | NEWLINE -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -strings: STRING+ -testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) | - (test (sync_comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test -comp_iter: sync_comp_for | comp_if -sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/parso/python/grammar34.txt b/parso/python/grammar34.txt deleted file mode 100644 index 999f4cd..0000000 --- a/parso/python/grammar34.txt +++ /dev/null @@ -1,134 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: stmt* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef) -funcdef: 'def' NAME parameters ['->' test] ':' suite -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt | NEWLINE -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom trailer* ['**' factor] -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -strings: STRING+ -testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) | - (test (sync_comp_for | (',' test)* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: (argument ',')* (argument [','] - |'*' test (',' argument)* [',' '**' test] - |'**' test) -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test -comp_iter: sync_comp_for | comp_if -sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/parso/python/grammar35.txt b/parso/python/grammar35.txt deleted file mode 100644 index 29f49e4..0000000 --- a/parso/python/grammar35.txt +++ /dev/null @@ -1,153 +0,0 @@ -# Grammar for Python - -# Note: Changing the grammar specified in this file will most likely -# require corresponding changes in the parser module -# (../Modules/parsermodule.c). If you can't make the changes to -# that module yourself, please co-ordinate the required changes -# with someone who can; ask around on python-dev for help. Fred -# Drake will probably be listening there. - -# NOTE WELL: You should also follow all the steps listed at -# https://docs.python.org/devguide/grammar.html - -# Start symbols for the grammar: -# single_input is a single interactive statement; -# file_input is a module or sequence of commands read from an input file; -# eval_input is the input for the eval() functions. -# NB: compound_stmt in single_input is followed by extra NEWLINE! -single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE -file_input: stmt* ENDMARKER -eval_input: testlist NEWLINE* ENDMARKER - -decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE -decorators: decorator+ -decorated: decorators (classdef | funcdef | async_funcdef) - -# NOTE: Reinoud Elhorst, using ASYNC/AWAIT keywords instead of tokens -# skipping python3.5 compatibility, in favour of 3.7 solution -async_funcdef: 'async' funcdef -funcdef: 'def' NAME parameters ['->' test] ':' suite - -parameters: '(' [typedargslist] ')' -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' - ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]] - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) -tfpdef: NAME [':' test] -varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' - ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] - | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef) -vfpdef: NAME - -stmt: simple_stmt | compound_stmt | NEWLINE -simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE -small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt) -expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*) -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//=') -# For normal assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist -pass_stmt: 'pass' -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt -break_stmt: 'break' -continue_stmt: 'continue' -return_stmt: 'return' [testlist] -yield_stmt: yield_expr -raise_stmt: 'raise' [test ['from' test]] -import_stmt: import_name | import_from -import_name: 'import' dotted_as_names -# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)) -import_as_name: NAME ['as' NAME] -dotted_as_name: dotted_name ['as' NAME] -import_as_names: import_as_name (',' import_as_name)* [','] -dotted_as_names: dotted_as_name (',' dotted_as_name)* -dotted_name: NAME ('.' NAME)* -global_stmt: 'global' NAME (',' NAME)* -nonlocal_stmt: 'nonlocal' NAME (',' NAME)* -assert_stmt: 'assert' test [',' test] - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt -async_stmt: 'async' (funcdef | with_stmt | for_stmt) -if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] -while_stmt: 'while' test ':' suite ['else' ':' suite] -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] -try_stmt: ('try' ':' suite - ((except_clause ':' suite)+ - ['else' ':' suite] - ['finally' ':' suite] | - 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -# NB compile.c makes sure that the default except clause is last -except_clause: 'except' [test ['as' NAME]] -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT - -test: or_test ['if' or_test 'else' test] | lambdef -test_nocond: or_test | lambdef_nocond -lambdef: 'lambda' [varargslist] ':' test -lambdef_nocond: 'lambda' [varargslist] ':' test_nocond -or_test: and_test ('or' and_test)* -and_test: not_test ('and' not_test)* -not_test: 'not' not_test | comparison -comparison: expr (comp_op expr)* -# <> isn't actually a valid comparison operator in Python. It's here for the -# sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' -star_expr: '*' expr -expr: xor_expr ('|' xor_expr)* -xor_expr: and_expr ('^' and_expr)* -and_expr: shift_expr ('&' shift_expr)* -shift_expr: arith_expr (('<<'|'>>') arith_expr)* -arith_expr: term (('+'|'-') term)* -term: factor (('*'|'@'|'/'|'%'|'//') factor)* -factor: ('+'|'-'|'~') factor | power -power: atom_expr ['**' factor] -atom_expr: ['await'] atom trailer* -atom: ('(' [yield_expr|testlist_comp] ')' | - '[' [testlist_comp] ']' | - '{' [dictorsetmaker] '}' | - NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') -strings: STRING+ -testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] ) -trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME -subscriptlist: subscript (',' subscript)* [','] -subscript: test | [test] ':' [test] [sliceop] -sliceop: ':' [test] -exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] -testlist: test (',' test)* [','] -dictorsetmaker: ( ((test ':' test | '**' expr) - (sync_comp_for | (',' (test ':' test | '**' expr))* [','])) | - ((test | star_expr) - (sync_comp_for | (',' (test | star_expr))* [','])) ) - -classdef: 'class' NAME ['(' [arglist] ')'] ':' suite - -arglist: argument (',' argument)* [','] - -# The reason that keywords are test nodes instead of NAME is that using NAME -# results in an ambiguity. ast.c makes sure it's a NAME. -# "test '=' test" is really "keyword '=' test", but we have no such token. -# These need to be in a single rule to avoid grammar that is ambiguous -# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, -# we explicitly match '*' here, too, to give it proper precedence. -# Illegal combinations and orderings are blocked in ast.c: -# multiple (test comp_for) arguments are blocked; keyword unpackings -# that precede iterable unpackings are blocked; etc. -argument: ( test [sync_comp_for] | - test '=' test | - '**' test | - '*' test ) - -comp_iter: sync_comp_for | comp_if -sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] -comp_if: 'if' test_nocond [comp_iter] - -# not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: NAME - -yield_expr: 'yield' [yield_arg] -yield_arg: 'from' test | testlist diff --git a/parso/python/issue_list.txt b/parso/python/issue_list.txt index 88ca092..4cd9ebf 100644 --- a/parso/python/issue_list.txt +++ b/parso/python/issue_list.txt @@ -24,7 +24,6 @@ A list of syntax/indentation errors I've encountered in CPython. # Just ignore this one, newer versions will not be affected anymore and # it's a limit of 2^16 - 1. - "too many annotations" # Only python 3.0 - 3.5, 3.6 is not affected. # Python/ast.c # used with_item exprlist expr_stmt @@ -54,8 +53,8 @@ A list of syntax/indentation errors I've encountered in CPython. "iterable unpacking cannot be used in comprehension" # [*[] for a in [1]] "dict unpacking cannot be used in dict comprehension" # {**{} for a in [1]} "Generator expression must be parenthesized if not sole argument" # foo(x for x in [], b) - "positional argument follows keyword argument unpacking" # f(**x, y) >= 3.5 - "positional argument follows keyword argument" # f(x=2, y) >= 3.5 + "positional argument follows keyword argument unpacking" # f(**x, y) + "positional argument follows keyword argument" # f(x=2, y) "iterable argument unpacking follows keyword argument unpacking" # foo(**kwargs, *args) "lambda cannot contain assignment" # f(lambda: 1=1) "keyword can't be an expression" # f(+x=1) @@ -167,10 +166,3 @@ A list of syntax/indentation errors I've encountered in CPython. E_OVERFLOW: "expression too long" E_DECODE: "unknown decode error" E_BADSINGLE: "multiple statements found while compiling a single statement" - - -Version specific: -Python 3.5: - 'yield' inside async function -Python 3.4: - can use starred expression only as assignment target diff --git a/parso/python/parser.py b/parso/python/parser.py index ada60fb..5cc3ced 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -43,10 +43,8 @@ class Parser(BaseParser): # Not sure if this is the best idea, but IMO it's the easiest way to # avoid extreme amounts of work around the subtle difference of 2/3 # grammar in list comoprehensions. - 'list_for': tree.SyncCompFor, 'decorator': tree.Decorator, 'lambdef': tree.Lambda, - 'old_lambdef': tree.Lambda, 'lambdef_nocond': tree.Lambda, } default_node = tree.PythonNode @@ -63,8 +61,8 @@ class Parser(BaseParser): } def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): - super(Parser, self).__init__(pgen_grammar, start_nonterminal, - error_recovery=error_recovery) + super().__init__(pgen_grammar, start_nonterminal, + error_recovery=error_recovery) self.syntax_errors = [] self._omit_dedent_list = [] @@ -77,7 +75,7 @@ class Parser(BaseParser): tokens = self._recovery_tokenize(tokens) - return super(Parser, self).parse(tokens) + return super().parse(tokens) def convert_node(self, nonterminal, children): """ @@ -96,12 +94,6 @@ class Parser(BaseParser): # ones and therefore have pseudo start/end positions and no # prefixes. Just ignore them. children = [children[0]] + children[2:-1] - elif nonterminal == 'list_if': - # Make transitioning from 2 to 3 easier. - nonterminal = 'comp_if' - elif nonterminal == 'listmaker': - # Same as list_if above. - nonterminal = 'testlist_comp' node = self.default_node(nonterminal, children) for c in children: c.parent = node @@ -146,7 +138,7 @@ class Parser(BaseParser): return if not self._error_recovery: - return super(Parser, self).error_recovery(token) + return super().error_recovery(token) def current_suite(stack): # For now just discard everything that is not a suite or diff --git a/parso/python/pep8.py b/parso/python/pep8.py index 2a037f9..e821a45 100644 --- a/parso/python/pep8.py +++ b/parso/python/pep8.py @@ -1,5 +1,6 @@ import re from contextlib import contextmanager +from typing import Tuple from parso.python.errors import ErrorFinder, ErrorFinderConfig from parso.normalizer import Rule @@ -15,16 +16,17 @@ _CLOSING_BRACKETS = ')', ']', '}' _FACTOR = '+', '-', '~' _ALLOW_SPACE = '*', '+', '-', '**', '/', '//', '@' _BITWISE_OPERATOR = '<<', '>>', '|', '&', '^' -_NEEDS_SPACE = ('=', '%', '->', - '<', '>', '==', '>=', '<=', '<>', '!=', - '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=', - '>>=', '**=', '//=') +_NEEDS_SPACE: Tuple[str, ...] = ( + '=', '%', '->', + '<', '>', '==', '>=', '<=', '<>', '!=', + '+=', '-=', '*=', '@=', '/=', '%=', '&=', '|=', '^=', '<<=', + '>>=', '**=', '//=') _NEEDS_SPACE += _BITWISE_OPERATOR _IMPLICIT_INDENTATION_TYPES = ('dictorsetmaker', 'argument') _POSSIBLE_SLICE_PARENTS = ('subscript', 'subscriptlist', 'sliceop') -class IndentationTypes(object): +class IndentationTypes: VERTICAL_BRACKET = object() HANGING_BRACKET = object() BACKSLASH = object() @@ -71,7 +73,6 @@ class BracketNode(IndentationNode): n = n.parent parent_indentation = n.indentation - next_leaf = leaf.get_next_leaf() if '\n' in next_leaf.prefix: # This implies code like: @@ -93,7 +94,7 @@ class BracketNode(IndentationNode): if '\t' in config.indentation: self.indentation = None else: - self.indentation = ' ' * expected_end_indent + self.indentation = ' ' * expected_end_indent self.bracket_indentation = self.indentation self.type = IndentationTypes.VERTICAL_BRACKET @@ -111,7 +112,7 @@ class ImplicitNode(BracketNode): annotations and dict values. """ def __init__(self, config, leaf, parent): - super(ImplicitNode, self).__init__(config, leaf, parent) + super().__init__(config, leaf, parent) self.type = IndentationTypes.IMPLICIT next_leaf = leaf.get_next_leaf() @@ -137,7 +138,7 @@ class BackslashNode(IndentationNode): self.indentation = parent_indentation + config.indentation else: # +1 because there is a space. - self.indentation = ' ' * (equals.end_pos[1] + 1) + self.indentation = ' ' * (equals.end_pos[1] + 1) else: self.indentation = parent_indentation + config.indentation self.bracket_indentation = self.indentation @@ -150,7 +151,7 @@ def _is_magic_name(name): class PEP8Normalizer(ErrorFinder): def __init__(self, *args, **kwargs): - super(PEP8Normalizer, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._previous_part = None self._previous_leaf = None self._on_newline = True @@ -173,7 +174,7 @@ class PEP8Normalizer(ErrorFinder): @contextmanager def visit_node(self, node): - with super(PEP8Normalizer, self).visit_node(node): + with super().visit_node(node): with self._visit_node(node): yield @@ -190,7 +191,8 @@ class PEP8Normalizer(ErrorFinder): expr_stmt = node.parent # Check if it's simply defining a single name, not something like # foo.bar or x[1], where using a lambda could make more sense. - if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' for n in expr_stmt.children[:-2:2]): + if expr_stmt.type == 'expr_stmt' and any(n.type == 'name' + for n in expr_stmt.children[:-2:2]): self.add_issue(node, 731, 'Do not assign a lambda expression, use a def') elif typ == 'try_stmt': for child in node.children: @@ -221,7 +223,6 @@ class PEP8Normalizer(ErrorFinder): if typ in _IMPORT_TYPES: simple_stmt = node.parent module = simple_stmt.parent - #if module.type == 'simple_stmt': if module.type == 'file_input': index = module.children.index(simple_stmt) for child in module.children[:index]: @@ -341,7 +342,7 @@ class PEP8Normalizer(ErrorFinder): self._newline_count = 0 def visit_leaf(self, leaf): - super(PEP8Normalizer, self).visit_leaf(leaf) + super().visit_leaf(leaf) for part in leaf._split_prefix(): if part.type == 'spacing': # This part is used for the part call after for. @@ -406,7 +407,6 @@ class PEP8Normalizer(ErrorFinder): and leaf.parent.parent.type == 'decorated': self.add_issue(part, 304, "Blank lines found after function decorator") - self._newline_count += 1 if type_ == 'backslash': @@ -461,33 +461,62 @@ class PEP8Normalizer(ErrorFinder): else: should_be_indentation = node.indentation if self._in_suite_introducer and indentation == \ - node.get_latest_suite_node().indentation \ - + self._config.indentation: - self.add_issue(part, 129, "Line with same indent as next logical block") + node.get_latest_suite_node().indentation \ + + self._config.indentation: + self.add_issue(part, 129, "Line with same indent as next logical block") elif indentation != should_be_indentation: if not self._check_tabs_spaces(spacing) and part.value != '\n': if value in '])}': if node.type == IndentationTypes.VERTICAL_BRACKET: - self.add_issue(part, 124, "Closing bracket does not match visual indentation") + self.add_issue( + part, + 124, + "Closing bracket does not match visual indentation" + ) else: - self.add_issue(part, 123, "Losing bracket does not match indentation of opening bracket's line") + self.add_issue( + part, + 123, + "Losing bracket does not match " + "indentation of opening bracket's line" + ) else: if len(indentation) < len(should_be_indentation): if node.type == IndentationTypes.VERTICAL_BRACKET: - self.add_issue(part, 128, 'Continuation line under-indented for visual indent') + self.add_issue( + part, + 128, + 'Continuation line under-indented for visual indent' + ) elif node.type == IndentationTypes.BACKSLASH: - self.add_issue(part, 122, 'Continuation line missing indentation or outdented') + self.add_issue( + part, + 122, + 'Continuation line missing indentation or outdented' + ) elif node.type == IndentationTypes.IMPLICIT: self.add_issue(part, 135, 'xxx') else: - self.add_issue(part, 121, 'Continuation line under-indented for hanging indent') + self.add_issue( + part, + 121, + 'Continuation line under-indented for hanging indent' + ) else: if node.type == IndentationTypes.VERTICAL_BRACKET: - self.add_issue(part, 127, 'Continuation line over-indented for visual indent') + self.add_issue( + part, + 127, + 'Continuation line over-indented for visual indent' + ) elif node.type == IndentationTypes.IMPLICIT: self.add_issue(part, 136, 'xxx') else: - self.add_issue(part, 126, 'Continuation line over-indented for hanging indent') + self.add_issue( + part, + 126, + 'Continuation line over-indented for hanging indent' + ) else: self._check_spacing(part, spacing) @@ -524,7 +553,7 @@ class PEP8Normalizer(ErrorFinder): else: last_column = part.end_pos[1] if last_column > self._config.max_characters \ - and spacing.start_pos[1] <= self._config.max_characters : + and spacing.start_pos[1] <= self._config.max_characters: # Special case for long URLs in multi-line docstrings or comments, # but still report the error when the 72 first chars are whitespaces. report = True @@ -538,7 +567,7 @@ class PEP8Normalizer(ErrorFinder): part, 501, 'Line too long (%s > %s characters)' % - (last_column, self._config.max_characters), + (last_column, self._config.max_characters), ) def _check_spacing(self, part, spacing): @@ -573,11 +602,11 @@ class PEP8Normalizer(ErrorFinder): message = "Whitespace before '%s'" % part.value add_if_spaces(spacing, 202, message) elif part in (',', ';') or part == ':' \ - and part.parent.type not in _POSSIBLE_SLICE_PARENTS: + and part.parent.type not in _POSSIBLE_SLICE_PARENTS: message = "Whitespace before '%s'" % part.value add_if_spaces(spacing, 203, message) elif prev == ':' and prev.parent.type in _POSSIBLE_SLICE_PARENTS: - pass # TODO + pass # TODO elif prev in (',', ';', ':'): add_not_spaces(spacing, 231, "missing whitespace after '%s'") elif part == ':': # Is a subscript @@ -602,9 +631,17 @@ class PEP8Normalizer(ErrorFinder): if param.type == 'param' and param.annotation: add_not_spaces(spacing, 252, 'Expected spaces around annotation equals') else: - add_if_spaces(spacing, 251, 'Unexpected spaces around keyword / parameter equals') + add_if_spaces( + spacing, + 251, + 'Unexpected spaces around keyword / parameter equals' + ) elif part in _BITWISE_OPERATOR or prev in _BITWISE_OPERATOR: - add_not_spaces(spacing, 227, 'Missing whitespace around bitwise or shift operator') + add_not_spaces( + spacing, + 227, + 'Missing whitespace around bitwise or shift operator' + ) elif part == '%' or prev == '%': add_not_spaces(spacing, 228, 'Missing whitespace around modulo operator') else: @@ -621,8 +658,7 @@ class PEP8Normalizer(ErrorFinder): if spaces and part not in _ALLOW_SPACE and prev not in _ALLOW_SPACE: message_225 = 'Missing whitespace between tokens' - #print('xy', spacing) - #self.add_issue(spacing, 225, message_225) + # self.add_issue(spacing, 225, message_225) # TODO why only brackets? if part in _OPENING_BRACKETS: message = "Whitespace before '%s'" % part.value @@ -664,7 +700,8 @@ class PEP8Normalizer(ErrorFinder): self.add_issue(leaf, 711, message) break elif node.value in ('True', 'False'): - message = "comparison to False/True should be 'if cond is True:' or 'if cond:'" + message = "comparison to False/True should be " \ + "'if cond is True:' or 'if cond:'" self.add_issue(leaf, 712, message) break elif leaf.value in ('in', 'is'): @@ -680,6 +717,7 @@ class PEP8Normalizer(ErrorFinder): indentation = re.match(r'[ \t]*', line).group(0) start_pos = leaf.line + i, len(indentation) # TODO check multiline indentation. + start_pos elif typ == 'endmarker': if self._newline_count >= 2: self.add_issue(leaf, 391, 'Blank line at end of file') @@ -694,7 +732,7 @@ class PEP8Normalizer(ErrorFinder): return if code in (901, 903): # 901 and 903 are raised by the ErrorFinder. - super(PEP8Normalizer, self).add_issue(node, code, message) + super().add_issue(node, code, message) else: # Skip ErrorFinder here, because it has custom behavior. super(ErrorFinder, self).add_issue(node, code, message) @@ -718,7 +756,7 @@ class PEP8NormalizerConfig(ErrorFinderConfig): # TODO this is not yet ready. -#@PEP8Normalizer.register_rule(type='endmarker') +# @PEP8Normalizer.register_rule(type='endmarker') class BlankLineAtEnd(Rule): code = 392 message = 'Blank line at end of file' diff --git a/parso/python/prefix.py b/parso/python/prefix.py index b7f1e1b..c764c46 100644 --- a/parso/python/prefix.py +++ b/parso/python/prefix.py @@ -6,7 +6,7 @@ from parso.python.tokenize import group unicode_bom = BOM_UTF8.decode('utf-8') -class PrefixPart(object): +class PrefixPart: def __init__(self, leaf, typ, value, spacing='', start_pos=None): assert start_pos is not None self.parent = leaf @@ -71,7 +71,7 @@ def split_prefix(leaf, start_pos): value = spacing = '' bom = False while start != len(leaf.prefix): - match =_regex.match(leaf.prefix, start) + match = _regex.match(leaf.prefix, start) spacing = match.group(1) value = match.group(2) if not value: diff --git a/parso/python/token.py b/parso/python/token.py index bb86ec9..9b6f4c7 100644 --- a/parso/python/token.py +++ b/parso/python/token.py @@ -1,8 +1,13 @@ from __future__ import absolute_import +from enum import Enum -class TokenType(object): - def __init__(self, name, contains_syntax=False): + +class TokenType: + name: str + contains_syntax: bool + + def __init__(self, name: str, contains_syntax: bool = False): self.name = name self.contains_syntax = contains_syntax @@ -10,18 +15,17 @@ class TokenType(object): return '%s(%s)' % (self.__class__.__name__, self.name) -class TokenTypes(object): - """ - Basically an enum, but Python 2 doesn't have enums in the standard library. - """ - def __init__(self, names, contains_syntax): - for name in names: - setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax)) - - -PythonTokenTypes = TokenTypes(( - 'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT', - 'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP', - 'ENDMARKER'), - contains_syntax=('NAME', 'OP'), -) +class PythonTokenTypes(Enum): + STRING = TokenType('STRING') + NUMBER = TokenType('NUMBER') + NAME = TokenType('NAME', contains_syntax=True) + ERRORTOKEN = TokenType('ERRORTOKEN') + NEWLINE = TokenType('NEWLINE') + INDENT = TokenType('INDENT') + DEDENT = TokenType('DEDENT') + ERROR_DEDENT = TokenType('ERROR_DEDENT') + FSTRING_STRING = TokenType('FSTRING_STRING') + FSTRING_START = TokenType('FSTRING_START') + FSTRING_END = TokenType('FSTRING_END') + OP = TokenType('OP', contains_syntax=True) + ENDMARKER = TokenType('ENDMARKER') diff --git a/parso/python/token.pyi b/parso/python/token.pyi deleted file mode 100644 index 48e8dac..0000000 --- a/parso/python/token.pyi +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Container, Iterable - -class TokenType: - name: str - contains_syntax: bool - def __init__(self, name: str, contains_syntax: bool) -> None: ... - -class TokenTypes: - def __init__( - self, names: Iterable[str], contains_syntax: Container[str] - ) -> None: ... - -# not an actual class in the source code, but we need this class to type the fields of -# PythonTokenTypes -class _FakePythonTokenTypesClass(TokenTypes): - STRING: TokenType - NUMBER: TokenType - NAME: TokenType - ERRORTOKEN: TokenType - NEWLINE: TokenType - INDENT: TokenType - DEDENT: TokenType - ERROR_DEDENT: TokenType - FSTRING_STRING: TokenType - FSTRING_START: TokenType - FSTRING_END: TokenType - OP: TokenType - ENDMARKER: TokenType - -PythonTokenTypes: _FakePythonTokenTypesClass = ... diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index fdcd8e0..6282275 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -13,12 +13,13 @@ from __future__ import absolute_import import sys import re -from collections import namedtuple import itertools as _itertools from codecs import BOM_UTF8 +from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \ + Pattern, Set from parso.python.token import PythonTokenTypes -from parso.utils import split_lines +from parso.utils import split_lines, PythonVersionInfo, parse_version_string # Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) @@ -38,31 +39,23 @@ FSTRING_START = PythonTokenTypes.FSTRING_START FSTRING_STRING = PythonTokenTypes.FSTRING_STRING FSTRING_END = PythonTokenTypes.FSTRING_END -TokenCollection = namedtuple( - 'TokenCollection', - 'pseudo_token single_quoted triple_quoted endpats whitespace ' - 'fstring_pattern_map always_break_tokens', -) + +class TokenCollection(NamedTuple): + pseudo_token: Pattern + single_quoted: Set[str] + triple_quoted: Set[str] + endpats: Dict[str, Pattern] + whitespace: Pattern + fstring_pattern_map: Dict[str, str] + always_break_tokens: Tuple[str] + BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') -_token_collection_cache = {} - -if sys.version_info.major >= 3: - # Python 3 has str.isidentifier() to check if a char is a valid identifier - is_identifier = str.isidentifier -else: - # Python 2 doesn't, but it's not that important anymore and if you tokenize - # Python 2 code with this, it's still ok. It's just that parsing Python 3 - # code with this function is not 100% correct. - # This just means that Python 2 code matches a few identifiers too much, - # but that doesn't really matter. - def is_identifier(s): - return True +_token_collection_cache: Dict[PythonVersionInfo, TokenCollection] = {} -def group(*choices, **kwargs): - capture = kwargs.pop('capture', False) # Python 2, arrghhhhh :( +def group(*choices, capture=False, **kwargs): assert not kwargs start = '(' @@ -76,19 +69,17 @@ def maybe(*choices): # Return the empty string, plus all of the valid string prefixes. -def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False): +def _all_string_prefixes(*, include_fstring=False, only_fstring=False): def different_case_versions(prefix): for s in _itertools.product(*[(c, c.upper()) for c in prefix]): yield ''.join(s) # The valid string prefixes. Only contain the lower case versions, # and don't contain any permuations (include 'fr', but not # 'rf'). The various permutations will be generated. - valid_string_prefixes = ['b', 'r', 'u'] - if version_info.major >= 3: - valid_string_prefixes.append('br') + valid_string_prefixes = ['b', 'r', 'u', 'br'] - result = set(['']) - if version_info >= (3, 6) and include_fstring: + result = {''} + if include_fstring: f = ['f', 'fr'] if only_fstring: valid_string_prefixes = f @@ -104,10 +95,6 @@ def _all_string_prefixes(version_info, include_fstring=False, only_fstring=False # create a list with upper and lower versions of each # character result.update(different_case_versions(t)) - if version_info.major == 2: - # In Python 2 the order cannot just be random. - result.update(different_case_versions('ur')) - result.update(different_case_versions('br')) return result @@ -136,53 +123,27 @@ def _create_token_collection(version_info): Whitespace = r'[ \f\t]*' whitespace = _compile(Whitespace) Comment = r'#[^\r\n]*' - # Python 2 is pretty much not working properly anymore, we just ignore - # parsing unicode properly, which is fine, I guess. - if version_info[0] == 2: - Name = r'([A-Za-z_0-9]+)' - elif sys.version_info[0] == 2: - # Unfortunately the regex engine cannot deal with the regex below, so - # just use this one. - Name = r'(\w+)' - else: - Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' + Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' - if version_info >= (3, 6): - Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' - Binnumber = r'0[bB](?:_?[01])+' - Octnumber = r'0[oO](?:_?[0-7])+' - Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' - Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) - Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' - Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', - r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) - Expfloat = r'[0-9](?:_?[0-9])*' + Exponent - Floatnumber = group(Pointfloat, Expfloat) - Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') - else: - Hexnumber = r'0[xX][0-9a-fA-F]+' - Binnumber = r'0[bB][01]+' - if version_info.major >= 3: - Octnumber = r'0[oO][0-7]+' - else: - Octnumber = '0[oO]?[0-7]+' - Decnumber = r'(?:0+|[1-9][0-9]*)' - Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) - if version_info[0] < 3: - Intnumber += '[lL]?' - Exponent = r'[eE][-+]?[0-9]+' - Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) - Expfloat = r'[0-9]+' + Exponent - Floatnumber = group(Pointfloat, Expfloat) - Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]') + Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' + Binnumber = r'0[bB](?:_?[01])+' + Octnumber = r'0[oO](?:_?[0-7])+' + Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)' + Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) + Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*' + Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', + r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) + Expfloat = r'[0-9](?:_?[0-9])*' + Exponent + Floatnumber = group(Pointfloat, Expfloat) + Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) # Note that since _all_string_prefixes includes the empty string, # StringPrefix can be the empty string (making it optional). - possible_prefixes = _all_string_prefixes(version_info) + possible_prefixes = _all_string_prefixes() StringPrefix = group(*possible_prefixes) - StringPrefixWithF = group(*_all_string_prefixes(version_info, include_fstring=True)) - fstring_prefixes = _all_string_prefixes(version_info, include_fstring=True, only_fstring=True) + StringPrefixWithF = group(*_all_string_prefixes(include_fstring=True)) + fstring_prefixes = _all_string_prefixes(include_fstring=True, only_fstring=True) FStringStart = group(*fstring_prefixes) # Tail end of ' string. @@ -205,9 +166,7 @@ def _create_token_collection(version_info): Bracket = '[][(){}]' - special_args = [r'\r\n?', r'\n', r'[;.,@]'] - if version_info >= (3, 0): - special_args.insert(0, r'\.\.\.') + special_args = [r'\.\.\.', r'\r\n?', r'\n', r'[;.,@]'] if version_info >= (3, 8): special_args.insert(0, ":=?") else: @@ -258,9 +217,7 @@ def _create_token_collection(version_info): ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except', 'finally', 'while', 'with', 'return', 'continue', - 'break', 'del', 'pass', 'global', 'assert') - if version_info >= (3, 5): - ALWAYS_BREAK_TOKENS += ('nonlocal', ) + 'break', 'del', 'pass', 'global', 'assert', 'nonlocal') pseudo_token_compiled = _compile(PseudoToken) return TokenCollection( pseudo_token_compiled, single_quoted, triple_quoted, endpats, @@ -268,9 +225,14 @@ def _create_token_collection(version_info): ) -class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): +class Token(NamedTuple): + type: PythonTokenTypes + string: str + start_pos: Tuple[int, int] + prefix: str + @property - def end_pos(self): + def end_pos(self) -> Tuple[int, int]: lines = split_lines(self.string) if len(lines) > 1: return self.start_pos[0] + len(lines) - 1, 0 @@ -284,7 +246,7 @@ class PythonToken(Token): self._replace(type=self.type.name)) -class FStringNode(object): +class FStringNode: def __init__(self, quote): self.quote = quote self.parentheses_count = 0 @@ -371,10 +333,12 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): return string, new_pos -def tokenize(code, version_info, start_pos=(1, 0)): +def tokenize( + code: str, *, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0) +) -> Iterator[PythonToken]: """Generate tokens from a the source code (string).""" lines = split_lines(code, keepends=True) - return tokenize_lines(lines, version_info, start_pos=start_pos) + return tokenize_lines(lines, version_info=version_info, start_pos=start_pos) def _print_tokens(func): @@ -390,7 +354,14 @@ def _print_tokens(func): # @_print_tokens -def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first_token=True): +def tokenize_lines( + lines: Iterable[str], + *, + version_info: PythonVersionInfo, + indents: List[int] = None, + start_pos: Tuple[int, int] = (1, 0), + is_first_token=True, +) -> Iterator[PythonToken]: """ A heavily modified Python standard library tokenizer. @@ -416,7 +387,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first max_ = 0 numchars = '0123456789' contstr = '' - contline = None + contline: str + contstr_start: Tuple[int, int] + endprog: Pattern # We start with a newline. This makes indent at the first position # possible. It's not valid Python, but still better than an INDENT in the # second line (and not in the first). This makes quite a few things in @@ -425,7 +398,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first prefix = '' # Should never be required, but here for safety additional_prefix = '' lnum = start_pos[0] - 1 - fstring_stack = [] + fstring_stack: List[FStringNode] = [] for line in lines: # loop over lines in stream lnum += 1 pos = 0 @@ -444,14 +417,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first is_first_token = False if contstr: # continued string - endmatch = endprog.match(line) + endmatch = endprog.match(line) # noqa: F821 if endmatch: pos = endmatch.end(0) yield PythonToken( STRING, contstr + line[:pos], - contstr_start, prefix) + contstr_start, prefix) # noqa: F821 contstr = '' - contline = None + contline = '' else: contstr = contstr + line contline = contline + line @@ -528,14 +501,12 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first if indent_start > indents[-1]: yield PythonToken(INDENT, '', spos, '') indents.append(indent_start) - for t in dedent_if_necessary(indent_start): - yield t + yield from dedent_if_necessary(indent_start) if not pseudomatch: # scan for tokens match = whitespace.match(line, pos) if new_line and paren_level == 0 and not fstring_stack: - for t in dedent_if_necessary(match.end()): - yield t + yield from dedent_if_necessary(match.end()) pos = match.end() new_line = False yield PythonToken( @@ -556,18 +527,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0), indents=None, is_first # We only want to dedent if the token is on a new line. m = re.match(r'[ \f\t]*$', line[:start]) if m is not None: - for t in dedent_if_necessary(m.end()): - yield t - if is_identifier(token): + yield from dedent_if_necessary(m.end()) + if token.isidentifier(): yield PythonToken(NAME, token, spos, prefix) else: - for t in _split_illegal_unicode_name(token, spos, prefix): - yield t # yield from Python 2 + yield from _split_illegal_unicode_name(token, spos, prefix) elif initial in '\r\n': if any(not f.allow_multiline() for f in fstring_stack): - # Would use fstring_stack.clear, but that's not available - # in Python 2. - fstring_stack[:] = [] + fstring_stack.clear() if not new_line and paren_level == 0 and not fstring_stack: yield PythonToken(NEWLINE, token, spos, prefix) @@ -681,7 +648,7 @@ def _split_illegal_unicode_name(token, start_pos, prefix): pos = start_pos for i, char in enumerate(token): if is_illegal: - if is_identifier(char): + if char.isidentifier(): yield create_token() found = char is_illegal = False @@ -691,7 +658,7 @@ def _split_illegal_unicode_name(token, start_pos, prefix): found += char else: new_found = found + char - if is_identifier(new_found): + if new_found.isidentifier(): found = new_found else: if found: @@ -706,17 +673,9 @@ def _split_illegal_unicode_name(token, start_pos, prefix): if __name__ == "__main__": - if len(sys.argv) >= 2: - path = sys.argv[1] - with open(path) as f: - code = f.read() - else: - code = sys.stdin.read() + path = sys.argv[1] + with open(path) as f: + code = f.read() - from parso.utils import python_bytes_to_unicode, parse_version_string - - if isinstance(code, bytes): - code = python_bytes_to_unicode(code) - - for token in tokenize(code, parse_version_string()): + for token in tokenize(code, version_info=parse_version_string('3.10')): print(token) diff --git a/parso/python/tokenize.pyi b/parso/python/tokenize.pyi deleted file mode 100644 index 1870bc2..0000000 --- a/parso/python/tokenize.pyi +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Generator, Iterable, NamedTuple, Tuple - -from parso.python.token import TokenType -from parso.utils import PythonVersionInfo - -class Token(NamedTuple): - type: TokenType - string: str - start_pos: Tuple[int, int] - prefix: str - @property - def end_pos(self) -> Tuple[int, int]: ... - -class PythonToken(Token): - def __repr__(self) -> str: ... - -def tokenize( - code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0) -) -> Generator[PythonToken, None, None]: ... -def tokenize_lines( - lines: Iterable[str], - version_info: PythonVersionInfo, - start_pos: Tuple[int, int] = (1, 0), -) -> Generator[PythonToken, None, None]: ... diff --git a/parso/python/tree.py b/parso/python/tree.py index 42fb9be..60696d0 100644 --- a/parso/python/tree.py +++ b/parso/python/tree.py @@ -1,5 +1,5 @@ """ -This is the syntax tree for Python syntaxes (2 & 3). The classes represent +This is the syntax tree for Python 3 syntaxes. The classes represent syntax elements like functions and imports. All of the nodes can be traced back to the `Python grammar file @@ -48,7 +48,6 @@ try: except ImportError: from collections import Mapping -from parso._compatibility import utf8_repr, unicode from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ search_ancestor from parso.python.prefix import split_prefix @@ -69,7 +68,7 @@ _GET_DEFINITION_TYPES = set([ _IMPORTS = set(['import_name', 'import_from']) -class DocstringMixin(object): +class DocstringMixin: __slots__ = () def get_doc_node(self): @@ -97,7 +96,7 @@ class DocstringMixin(object): return None -class PythonMixin(object): +class PythonMixin: """ Some Python specific utilities. """ @@ -175,7 +174,6 @@ class EndMarker(_LeafWithoutNewlines): __slots__ = () type = 'endmarker' - @utf8_repr def __repr__(self): return "<%s: prefix=%s end_pos=%s>" % ( type(self).__name__, repr(self.prefix), self.end_pos @@ -187,7 +185,6 @@ class Newline(PythonLeaf): __slots__ = () type = 'newline' - @utf8_repr def __repr__(self): return "<%s: %s>" % (type(self).__name__, repr(self.value)) @@ -227,9 +224,6 @@ class Name(_LeafWithoutNewlines): return None if type_ == 'except_clause': - # TODO in Python 2 this doesn't work correctly. See grammar file. - # I think we'll just let it be. Python 2 will be gone in a few - # years. if self.get_previous_sibling() == 'as': return node.parent # The try_stmt. return None @@ -302,21 +296,17 @@ class FStringEnd(PythonLeaf): __slots__ = () -class _StringComparisonMixin(object): +class _StringComparisonMixin: def __eq__(self, other): """ Make comparisons with strings easy. Improves the readability of the parser. """ - if isinstance(other, (str, unicode)): + if isinstance(other, str): return self.value == other return self is other - def __ne__(self, other): - """Python 2 compatibility.""" - return not self.__eq__(other) - def __hash__(self): return hash(self.value) @@ -340,7 +330,7 @@ class Scope(PythonBaseNode, DocstringMixin): __slots__ = () def __init__(self, children): - super(Scope, self).__init__(children) + super().__init__(children) def iter_funcdefs(self): """ @@ -366,8 +356,7 @@ class Scope(PythonBaseNode, DocstringMixin): if element.type in names: yield element if element.type in _FUNC_CONTAINERS: - for e in scan(element.children): - yield e + yield from scan(element.children) return scan(self.children) @@ -397,7 +386,7 @@ class Module(Scope): type = 'file_input' def __init__(self, children): - super(Module, self).__init__(children) + super().__init__(children) self._used_names = None def _iter_future_import_names(self): @@ -416,18 +405,6 @@ class Module(Scope): if len(names) == 2 and names[0] == '__future__': yield names[1] - def _has_explicit_absolute_import(self): - """ - Checks if imports in this module are explicitly absolute, i.e. there - is a ``__future__`` import. - Currently not public, might be in the future. - :return bool: - """ - for name in self._iter_future_import_names(): - if name == 'absolute_import': - return True - return False - def get_used_names(self): """ Returns all the :class:`Name` leafs that exist in this module. This @@ -493,7 +470,7 @@ class Class(ClassOrFunc): __slots__ = () def __init__(self, children): - super(Class, self).__init__(children) + super().__init__(children) def get_super_arglist(self): """ @@ -520,24 +497,13 @@ def _create_params(parent, argslist_list): You could also say that this function replaces the argslist node with a list of Param objects. """ - def check_python2_nested_param(node): - """ - Python 2 allows params to look like ``def x(a, (b, c))``, which is - basically a way of unpacking tuples in params. Python 3 has ditched - this behavior. Jedi currently just ignores those constructs. - """ - return node.type == 'fpdef' and node.children[0] == '(' - try: first = argslist_list[0] except IndexError: return [] if first.type in ('name', 'fpdef'): - if check_python2_nested_param(first): - return [first] - else: - return [Param([first], parent)] + return [Param([first], parent)] elif first == '*': return [first] else: # argslist is a `typedargslist` or a `varargslist`. @@ -555,7 +521,6 @@ def _create_params(parent, argslist_list): if param_children[0] == '*' \ and (len(param_children) == 1 or param_children[1] == ',') \ - or check_python2_nested_param(param_children[0]) \ or param_children[0] == '/': for p in param_children: p.parent = parent @@ -583,7 +548,7 @@ class Function(ClassOrFunc): type = 'funcdef' def __init__(self, children): - super(Function, self).__init__(children) + super().__init__(children) parameters = self.children[2] # After `def foo` parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) @@ -618,8 +583,7 @@ class Function(ClassOrFunc): else: yield element else: - for result in scan(nested_children): - yield result + yield from scan(nested_children) return scan(self.children) @@ -633,8 +597,7 @@ class Function(ClassOrFunc): or element.type == 'keyword' and element.value == 'return': yield element if element.type in _RETURN_STMT_CONTAINERS: - for e in scan(element.children): - yield e + yield from scan(element.children) return scan(self.children) @@ -648,8 +611,7 @@ class Function(ClassOrFunc): or element.type == 'keyword' and element.value == 'raise': yield element if element.type in _RETURN_STMT_CONTAINERS: - for e in scan(element.children): - yield e + yield from scan(element.children) return scan(self.children) @@ -1101,8 +1063,7 @@ class ExprStmt(PythonBaseNode, DocstringMixin): first = first.children[2] yield first - for operator in self.children[3::2]: - yield operator + yield from self.children[3::2] class Param(PythonBaseNode): @@ -1114,7 +1075,7 @@ class Param(PythonBaseNode): type = 'param' def __init__(self, children, parent): - super(Param, self).__init__(children) + super().__init__(children) self.parent = parent for child in children: child.parent = self @@ -1214,7 +1175,7 @@ class Param(PythonBaseNode): :param include_comma bool: If enabled includes the comma in the string output. """ if include_comma: - return super(Param, self).get_code(include_prefix) + return super().get_code(include_prefix) children = self.children if children[-1] == ',': diff --git a/parso/tree.py b/parso/tree.py index b115197..311f2c0 100644 --- a/parso/tree.py +++ b/parso/tree.py @@ -1,7 +1,5 @@ -import sys from abc import abstractmethod, abstractproperty -from parso._compatibility import utf8_repr, encoding from parso.utils import split_lines @@ -20,12 +18,12 @@ def search_ancestor(node, *node_types): return node -class NodeOrLeaf(object): +class NodeOrLeaf: """ The base class for nodes and leaves. """ __slots__ = () - type = None + type: str ''' The type is a string that typically matches the types of the grammar file. ''' @@ -238,7 +236,6 @@ class Leaf(NodeOrLeaf): end_pos_column = len(lines[-1]) return end_pos_line, end_pos_column - @utf8_repr def __repr__(self): value = self.value if not value: @@ -250,7 +247,7 @@ class TypedLeaf(Leaf): __slots__ = ('type',) def __init__(self, type, value, start_pos, prefix=''): - super(TypedLeaf, self).__init__(value, start_pos, prefix) + super().__init__(value, start_pos, prefix) self.type = type @@ -260,7 +257,6 @@ class BaseNode(NodeOrLeaf): A node has children, a type and possibly a parent node. """ __slots__ = ('children', 'parent') - type = None def __init__(self, children): self.children = children @@ -315,7 +311,6 @@ class BaseNode(NodeOrLeaf): except AttributeError: return element - index = int((lower + upper) / 2) element = self.children[index] if position <= element.end_pos: @@ -333,11 +328,8 @@ class BaseNode(NodeOrLeaf): def get_last_leaf(self): return self.children[-1].get_last_leaf() - @utf8_repr def __repr__(self): code = self.get_code().replace('\n', ' ').replace('\r', ' ').strip() - if not sys.version_info.major >= 3: - code = code.encode(encoding, 'replace') return "<%s: %s@%s,%s>" % \ (type(self).__name__, code, self.start_pos[0], self.start_pos[1]) @@ -347,7 +339,7 @@ class Node(BaseNode): __slots__ = ('type',) def __init__(self, type, children): - super(Node, self).__init__(children) + super().__init__(children) self.type = type def __repr__(self): @@ -373,7 +365,7 @@ class ErrorLeaf(Leaf): type = 'error_leaf' def __init__(self, token_type, value, start_pos, prefix=''): - super(ErrorLeaf, self).__init__(value, start_pos, prefix) + super().__init__(value, start_pos, prefix) self.token_type = token_type def __repr__(self): diff --git a/parso/utils.py b/parso/utils.py index 99a1307..e3d9038 100644 --- a/parso/utils.py +++ b/parso/utils.py @@ -1,30 +1,32 @@ -from collections import namedtuple import re import sys from ast import literal_eval from functools import total_ordering - -from parso._compatibility import unicode +from typing import NamedTuple, Sequence, Union # The following is a list in Python that are line breaks in str.splitlines, but # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed, # 0xA) are allowed to split lines. _NON_LINE_BREAKS = ( - u'\v', # Vertical Tabulation 0xB - u'\f', # Form Feed 0xC - u'\x1C', # File Separator - u'\x1D', # Group Separator - u'\x1E', # Record Separator - u'\x85', # Next Line (NEL - Equivalent to CR+LF. - # Used to mark end-of-line on some IBM mainframes.) - u'\u2028', # Line Separator - u'\u2029', # Paragraph Separator + '\v', # Vertical Tabulation 0xB + '\f', # Form Feed 0xC + '\x1C', # File Separator + '\x1D', # Group Separator + '\x1E', # Record Separator + '\x85', # Next Line (NEL - Equivalent to CR+LF. + # Used to mark end-of-line on some IBM mainframes.) + '\u2028', # Line Separator + '\u2029', # Paragraph Separator ) -Version = namedtuple('Version', 'major, minor, micro') + +class Version(NamedTuple): + major: int + minor: int + micro: int -def split_lines(string, keepends=False): +def split_lines(string: str, keepends: bool = False) -> Sequence[str]: r""" Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, looks at form feeds and other special characters as normal text. Just @@ -68,7 +70,9 @@ def split_lines(string, keepends=False): return re.split(r'\n|\r\n|\r', string) -def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'): +def python_bytes_to_unicode( + source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict' +) -> str: """ Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a unicode object like in :py:meth:`bytes.decode`. @@ -92,33 +96,33 @@ def python_bytes_to_unicode(source, encoding='utf-8', errors='strict'): possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", first_two_lines) if possible_encoding: - return possible_encoding.group(1) + e = possible_encoding.group(1) + if not isinstance(e, str): + e = str(e, 'ascii', 'replace') + return e else: # the default if nothing else has been set -> PEP 263 return encoding - if isinstance(source, unicode): + if isinstance(source, str): # only cast str/bytes return source encoding = detect_encoding() - if not isinstance(encoding, unicode): - encoding = unicode(encoding, 'utf-8', 'replace') - try: # Cast to unicode - return unicode(source, encoding, errors) + return str(source, encoding, errors) except LookupError: if errors == 'replace': # This is a weird case that can happen if the given encoding is not # a valid encoding. This usually shouldn't happen with provided # encodings, but can happen if somebody uses encoding declarations # like `# coding: foo-8`. - return unicode(source, 'utf-8', errors) + return str(source, 'utf-8', errors) raise -def version_info(): +def version_info() -> Version: """ Returns a namedtuple of parso's version, similar to Python's ``sys.version_info``. @@ -128,7 +132,34 @@ def version_info(): return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) -def _parse_version(version): +class _PythonVersionInfo(NamedTuple): + major: int + minor: int + + +@total_ordering +class PythonVersionInfo(_PythonVersionInfo): + def __gt__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) > other + super().__gt__(other) + + return (self.major, self.minor) + + def __eq__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) == other + super().__eq__(other) + + def __ne__(self, other): + return not self.__eq__(other) + + +def _parse_version(version) -> PythonVersionInfo: match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) if match is None: raise ValueError('The given version is not in the right format. ' @@ -149,37 +180,15 @@ def _parse_version(version): return PythonVersionInfo(major, minor) -@total_ordering -class PythonVersionInfo(namedtuple('Version', 'major, minor')): - def __gt__(self, other): - if isinstance(other, tuple): - if len(other) != 2: - raise ValueError("Can only compare to tuples of length 2.") - return (self.major, self.minor) > other - super(PythonVersionInfo, self).__gt__(other) - - return (self.major, self.minor) - - def __eq__(self, other): - if isinstance(other, tuple): - if len(other) != 2: - raise ValueError("Can only compare to tuples of length 2.") - return (self.major, self.minor) == other - super(PythonVersionInfo, self).__eq__(other) - - def __ne__(self, other): - return not self.__eq__(other) - - -def parse_version_string(version=None): +def parse_version_string(version: str = None) -> PythonVersionInfo: """ - Checks for a valid version number (e.g. `3.8` or `2.7.1` or `3`) and + Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and returns a corresponding version info that is always two characters long in decimal. """ if version is None: version = '%s.%s' % sys.version_info[:2] - if not isinstance(version, (unicode, str)): + if not isinstance(version, str): raise TypeError('version must be a string like "3.8"') return _parse_version(version) diff --git a/parso/utils.pyi b/parso/utils.pyi deleted file mode 100644 index 12c77da..0000000 --- a/parso/utils.pyi +++ /dev/null @@ -1,29 +0,0 @@ -from typing import NamedTuple, Optional, Sequence, Union - -class Version(NamedTuple): - major: int - minor: int - micro: int - -def split_lines(string: str, keepends: bool = ...) -> Sequence[str]: ... -def python_bytes_to_unicode( - source: Union[str, bytes], encoding: str = ..., errors: str = ... -) -> str: ... -def version_info() -> Version: - """ - Returns a namedtuple of parso's version, similar to Python's - ``sys.version_info``. - """ - ... - -class PythonVersionInfo(NamedTuple): - major: int - minor: int - -def parse_version_string(version: Optional[str]) -> PythonVersionInfo: - """ - Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and - returns a corresponding version info that is always two characters long in - decimal. - """ - ... diff --git a/scripts/diff_parser_profile.py b/scripts/diff_parser_profile.py index a152a3e..93a1202 100755 --- a/scripts/diff_parser_profile.py +++ b/scripts/diff_parser_profile.py @@ -18,7 +18,6 @@ from docopt import docopt from jedi.parser.python import load_grammar from jedi.parser.diff import DiffParser from jedi.parser.python import ParserWithRecovery -from jedi._compatibility import u from jedi.common import splitlines import jedi @@ -37,14 +36,15 @@ def main(args): with open(args['']) as f: code = f.read() grammar = load_grammar() - parser = ParserWithRecovery(grammar, u(code)) + parser = ParserWithRecovery(grammar, code) # Make sure used_names is loaded parser.module.used_names - code = code + '\na\n' # Add something so the diff parser needs to run. + code = code + '\na\n' # Add something so the diff parser needs to run. lines = splitlines(code, keepends=True) cProfile.runctx('run(parser, lines)', globals(), locals(), sort=args['-s']) + if __name__ == '__main__': args = docopt(__doc__) main(args) diff --git a/setup.cfg b/setup.cfg index 1295389..433824a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,3 +10,16 @@ ignore = E226, # line break before binary operator W503, + + +[mypy] +disallow_subclassing_any = True + +# Avoid creating future gotchas emerging from bad typing +warn_redundant_casts = True +warn_unused_ignores = True +warn_return_any = True +warn_unused_configs = True +warn_unreachable = True + +strict_equality = True diff --git a/setup.py b/setup.py index a714888..0ead19c 100755 --- a/setup.py +++ b/setup.py @@ -12,44 +12,47 @@ __AUTHOR_EMAIL__ = 'davidhalter88@gmail.com' readme = open('README.rst').read() + '\n\n' + open('CHANGELOG.rst').read() -setup(name='parso', - version=parso.__version__, - description='A Python Parser', - author=__AUTHOR__, - author_email=__AUTHOR_EMAIL__, - include_package_data=True, - maintainer=__AUTHOR__, - maintainer_email=__AUTHOR_EMAIL__, - url='https://github.com/davidhalter/parso', - license='MIT', - keywords='python parser parsing', - long_description=readme, - packages=find_packages(exclude=['test']), - package_data={'parso': ['python/grammar*.txt', 'py.typed', '*.pyi', '**/*.pyi']}, - platforms=['any'], - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*', - classifiers=[ - 'Development Status :: 4 - Beta', - 'Environment :: Plugins', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'Topic :: Text Editors :: Integrated Development Environments (IDE)', - 'Topic :: Utilities', - 'Typing :: Typed', - ], - extras_require={ - 'testing': [ - 'pytest>=3.0.7', - 'docopt', - ], - }, - ) +setup( + name='parso', + version=parso.__version__, + description='A Python Parser', + author=__AUTHOR__, + author_email=__AUTHOR_EMAIL__, + include_package_data=True, + maintainer=__AUTHOR__, + maintainer_email=__AUTHOR_EMAIL__, + url='https://github.com/davidhalter/parso', + license='MIT', + keywords='python parser parsing', + long_description=readme, + packages=find_packages(exclude=['test']), + package_data={'parso': ['python/grammar*.txt', 'py.typed', '*.pyi', '**/*.pyi']}, + platforms=['any'], + python_requires='>=3.6', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: Plugins', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Text Editors :: Integrated Development Environments (IDE)', + 'Topic :: Utilities', + 'Typing :: Typed', + ], + extras_require={ + 'testing': [ + 'pytest<6.0.0', + 'docopt', + ], + 'qa': [ + 'flake8==3.8.3', + 'mypy==0.782', + ], + }, +) diff --git a/test/failing_examples.py b/test/failing_examples.py index 2c67cb7..e58b0a0 100644 --- a/test/failing_examples.py +++ b/test/failing_examples.py @@ -34,7 +34,7 @@ FAILING_EXAMPLES = [ 'lambda x=3, y: x', '__debug__ = 1', 'with x() as __debug__: pass', - # Mostly 3.6 relevant + '[]: int', '[a, b]: int', '(): int', @@ -56,6 +56,7 @@ FAILING_EXAMPLES = [ 'a, b += 3', '(a, b) += 3', '[a, b] += 3', + '[a, 1] += 3', 'f() += 1', 'lambda x:None+=1', '{} += 1', @@ -130,6 +131,8 @@ FAILING_EXAMPLES = [ r"u'\N{foo}'", r'b"\x"', r'b"\"', + 'b"ä"', + '*a, *b = 3, 3', 'async def foo(): yield from []', 'yield from []', @@ -138,6 +141,16 @@ FAILING_EXAMPLES = [ 'def x(*): pass', '(%s *d) = x' % ('a,' * 256), '{**{} for a in [1]}', + '(True,) = x', + '([False], a) = x', + 'def x(): from math import *', + + # str/bytes combinations + '"s" b""', + '"s" b"" ""', + 'b"" "" b"" ""', + 'f"s" b""', + 'b"s" f""', # Parser/tokenize.c r'"""', @@ -176,9 +189,17 @@ FAILING_EXAMPLES = [ "f'{1;1}'", "f'{a;}'", "f'{b\"\" \"\"}'", -] + # f-string expression part cannot include a backslash + r'''f"{'\n'}"''', -GLOBAL_NONLOCAL_ERROR = [ + 'async def foo():\n yield x\n return 1', + 'async def foo():\n yield x\n return 1', + + '[*[] for a in [1]]', + 'async def bla():\n def x(): await bla()', + 'del None', + + # Errors of global / nonlocal dedent(''' def glob(): x = 3 @@ -277,65 +298,6 @@ GLOBAL_NONLOCAL_ERROR = [ '''), ] -if sys.version_info >= (3, 6): - FAILING_EXAMPLES += GLOBAL_NONLOCAL_ERROR -if sys.version_info >= (3, 5): - FAILING_EXAMPLES += [ - # Raises different errors so just ignore them for now. - '[*[] for a in [1]]', - # Raises multiple errors in previous versions. - 'async def bla():\n def x(): await bla()', - ] -if sys.version_info >= (3, 4): - # Before that del None works like del list, it gives a NameError. - FAILING_EXAMPLES.append('del None') -if sys.version_info >= (3,): - FAILING_EXAMPLES += [ - # Unfortunately assigning to False and True do not raise an error in - # 2.x. - '(True,) = x', - '([False], a) = x', - # A symtable error that raises only a SyntaxWarning in Python 2. - 'def x(): from math import *', - # unicode chars in bytes are allowed in python 2 - 'b"ä"', - # combining strings and unicode is allowed in Python 2. - '"s" b""', - '"s" b"" ""', - 'b"" "" b"" ""', - ] -if sys.version_info >= (3, 6): - FAILING_EXAMPLES += [ - # Same as above, but for f-strings. - 'f"s" b""', - 'b"s" f""', - - # f-string expression part cannot include a backslash - r'''f"{'\n'}"''', - ] -FAILING_EXAMPLES.append('[a, 1] += 3') - -if sys.version_info[:2] == (3, 5): - # yields are not allowed in 3.5 async functions. Therefore test them - # separately, here. - FAILING_EXAMPLES += [ - 'async def foo():\n yield x', - 'async def foo():\n yield x', - ] -else: - FAILING_EXAMPLES += [ - 'async def foo():\n yield x\n return 1', - 'async def foo():\n yield x\n return 1', - ] - - -if sys.version_info[:2] <= (3, 4): - # Python > 3.4 this is valid code. - FAILING_EXAMPLES += [ - 'a = *[1], 2', - '(*[1], 2)', - ] - if sys.version_info[:2] >= (3, 7): # This is somehow ok in previous versions. FAILING_EXAMPLES += [ diff --git a/test/fuzz_diff_parser.py b/test/fuzz_diff_parser.py index 6137ab6..39b93f2 100644 --- a/test/fuzz_diff_parser.py +++ b/test/fuzz_diff_parser.py @@ -135,11 +135,11 @@ class FileModification: # We cannot delete every line, that doesn't make sense to # fuzz and it would be annoying to rewrite everything here. continue - l = LineDeletion(random_line()) + ld = LineDeletion(random_line()) elif rand == 2: # Copy / Insertion # Make it possible to insert into the first and the last line - l = LineCopy(random_line(), random_line(include_end=True)) + ld = LineCopy(random_line(), random_line(include_end=True)) elif rand in (3, 4): # Modify a line in some weird random ways. line_nr = random_line() @@ -166,9 +166,9 @@ class FileModification: # we really replace the line with something that has # indentation. line = ' ' * random.randint(0, 12) + random_string + '\n' - l = LineReplacement(line_nr, line) - l.apply(lines) - yield l + ld = LineReplacement(line_nr, line) + ld.apply(lines) + yield ld def __init__(self, modification_list, check_original): self.modification_list = modification_list diff --git a/test/normalizer_issue_files/allowed_syntax.py b/test/normalizer_issue_files/allowed_syntax.py index a73b84c..88df565 100644 --- a/test/normalizer_issue_files/allowed_syntax.py +++ b/test/normalizer_issue_files/allowed_syntax.py @@ -44,3 +44,75 @@ a = 3 def x(b=a): global a + + +*foo, a = (1,) +*foo[0], a = (1,) +*[], a = (1,) + + +async def foo(): + await bar() + #: E901 + yield from [] + return + #: E901 + return '' + + +# With decorator it's a different statement. +@bla +async def foo(): + await bar() + #: E901 + yield from [] + return + #: E901 + return '' + + +foo: int = 4 +(foo): int = 3 +((foo)): int = 3 +foo.bar: int +foo[3]: int + + +def glob(): + global x + y: foo = x + + +def c(): + a = 3 + + def d(): + class X(): + nonlocal a + + +def x(): + a = 3 + + def y(): + nonlocal a + + +def x(): + def y(): + nonlocal a + + a = 3 + + +def x(): + a = 3 + + def y(): + class z(): + nonlocal a + + +a = *args, *args +error[(*args, *args)] = 3 +*args, *args diff --git a/test/normalizer_issue_files/allowed_syntax_python2.py b/test/normalizer_issue_files/allowed_syntax_python2.py deleted file mode 100644 index 81736bc..0000000 --- a/test/normalizer_issue_files/allowed_syntax_python2.py +++ /dev/null @@ -1,2 +0,0 @@ -'s' b'' -u's' b'ä' diff --git a/test/normalizer_issue_files/allowed_syntax_python3.4.py b/test/normalizer_issue_files/allowed_syntax_python3.4.py deleted file mode 100644 index 1759575..0000000 --- a/test/normalizer_issue_files/allowed_syntax_python3.4.py +++ /dev/null @@ -1,3 +0,0 @@ -*foo, a = (1,) -*foo[0], a = (1,) -*[], a = (1,) diff --git a/test/normalizer_issue_files/allowed_syntax_python3.5.py b/test/normalizer_issue_files/allowed_syntax_python3.5.py deleted file mode 100644 index cc0385b..0000000 --- a/test/normalizer_issue_files/allowed_syntax_python3.5.py +++ /dev/null @@ -1,23 +0,0 @@ -""" -Mostly allowed syntax in Python 3.5. -""" - - -async def foo(): - await bar() - #: E901 - yield from [] - return - #: E901 - return '' - - -# With decorator it's a different statement. -@bla -async def foo(): - await bar() - #: E901 - yield from [] - return - #: E901 - return '' diff --git a/test/normalizer_issue_files/allowed_syntax_python3.6.py b/test/normalizer_issue_files/allowed_syntax_python3.6.py deleted file mode 100644 index 1bbe071..0000000 --- a/test/normalizer_issue_files/allowed_syntax_python3.6.py +++ /dev/null @@ -1,45 +0,0 @@ -foo: int = 4 -(foo): int = 3 -((foo)): int = 3 -foo.bar: int -foo[3]: int - - -def glob(): - global x - y: foo = x - - -def c(): - a = 3 - - def d(): - class X(): - nonlocal a - - -def x(): - a = 3 - - def y(): - nonlocal a - - -def x(): - def y(): - nonlocal a - - a = 3 - - -def x(): - a = 3 - - def y(): - class z(): - nonlocal a - - -a = *args, *args -error[(*args, *args)] = 3 -*args, *args diff --git a/test/normalizer_issue_files/python3.py b/test/normalizer_issue_files/python.py similarity index 100% rename from test/normalizer_issue_files/python3.py rename to test/normalizer_issue_files/python.py diff --git a/test/normalizer_issue_files/python2.7.py b/test/normalizer_issue_files/python2.7.py deleted file mode 100644 index 5d10739..0000000 --- a/test/normalizer_issue_files/python2.7.py +++ /dev/null @@ -1,14 +0,0 @@ -import sys - -print 1, 2 >> sys.stdout - - -foo = ur'This is not possible in Python 3.' - -# This is actually printing a tuple. -#: E275:5 -print(1, 2) - -# True and False are not keywords in Python 2 and therefore there's no need for -# a space. -norman = True+False diff --git a/test/test_absolute_import.py b/test/test_absolute_import.py deleted file mode 100644 index c959ea5..0000000 --- a/test/test_absolute_import.py +++ /dev/null @@ -1,29 +0,0 @@ -""" -Tests ``from __future__ import absolute_import`` (only important for -Python 2.X) -""" -from parso import parse - - -def test_explicit_absolute_imports(): - """ - Detect modules with ``from __future__ import absolute_import``. - """ - module = parse("from __future__ import absolute_import") - assert module._has_explicit_absolute_import() - - -def test_no_explicit_absolute_imports(): - """ - Detect modules without ``from __future__ import absolute_import``. - """ - assert not parse("1")._has_explicit_absolute_import() - - -def test_dont_break_imports_without_namespaces(): - """ - The code checking for ``from __future__ import absolute_import`` shouldn't - assume that all imports have non-``None`` namespaces. - """ - src = "from __future__ import absolute_import\nimport xyzzy" - assert parse(src)._has_explicit_absolute_import() diff --git a/test/test_cache.py b/test/test_cache.py index e1a0a9e..bfdbaf5 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -3,17 +3,16 @@ Test all things related to the ``jedi.cache`` module. """ import os -import os.path - import pytest import time +from pathlib import Path from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG, - _get_cache_clear_lock, _get_hashed_path, + _get_cache_clear_lock_path, _get_hashed_path, _load_from_file_system, _NodeCacheItem, _remove_cache_and_update_lock, _save_to_file_system, load_module, parser_cache, try_to_save_module) -from parso._compatibility import is_pypy, PermissionError +from parso._compatibility import is_pypy from parso import load_grammar from parso import cache from parso import file_io @@ -30,9 +29,8 @@ skip_pypy = pytest.mark.skipif( def isolated_parso_cache(monkeypatch, tmpdir): """Set `parso.cache._default_cache_path` to a temporary directory during the test. """ - cache_path = str(os.path.join(str(tmpdir), "__parso_cache")) + cache_path = Path(str(tmpdir), "__parso_cache") monkeypatch.setattr(cache, '_default_cache_path', cache_path) - monkeypatch.setattr(cache, '_get_default_cache_path', lambda *args, **kwargs: cache_path) return cache_path @@ -42,13 +40,13 @@ def test_modulepickling_change_cache_dir(tmpdir): See: `#168 `_ """ - dir_1 = str(tmpdir.mkdir('first')) - dir_2 = str(tmpdir.mkdir('second')) + dir_1 = Path(str(tmpdir.mkdir('first'))) + dir_2 = Path(str(tmpdir.mkdir('second'))) item_1 = _NodeCacheItem('bla', []) item_2 = _NodeCacheItem('bla', []) - path_1 = 'fake path 1' - path_2 = 'fake path 2' + path_1 = Path('fake path 1') + path_2 = Path('fake path 2') hashed_grammar = load_grammar()._hashed _save_to_file_system(hashed_grammar, path_1, item_1, cache_path=dir_1) @@ -81,12 +79,12 @@ def test_modulepickling_simulate_deleted_cache(tmpdir): way. __ https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html - """ + """ # noqa grammar = load_grammar() module = 'fake parser' # Create the file - path = tmpdir.dirname + '/some_path' + path = Path(str(tmpdir.dirname), 'some_path') with open(path, 'w'): pass io = file_io.FileIO(path) @@ -124,7 +122,7 @@ def test_cache_limit(): class _FixedTimeFileIO(file_io.KnownContentFileIO): def __init__(self, path, content, last_modified): - super(_FixedTimeFileIO, self).__init__(path, content) + super().__init__(path, content) self._last_modified = last_modified def get_last_modified(self): @@ -134,7 +132,7 @@ class _FixedTimeFileIO(file_io.KnownContentFileIO): @pytest.mark.parametrize('diff_cache', [False, True]) @pytest.mark.parametrize('use_file_io', [False, True]) def test_cache_last_used_update(diff_cache, use_file_io): - p = '/path/last-used' + p = Path('/path/last-used') parser_cache.clear() # Clear, because then it's easier to find stuff. parse('somecode', cache=True, path=p) node_cache_item = next(iter(parser_cache.values()))[p] @@ -157,21 +155,21 @@ def test_inactive_cache(tmpdir, isolated_parso_cache): test_subjects = "abcdef" for path in test_subjects: parse('somecode', cache=True, path=os.path.join(str(tmpdir), path)) - raw_cache_path = os.path.join(isolated_parso_cache, _VERSION_TAG) - assert os.path.exists(raw_cache_path) - paths = os.listdir(raw_cache_path) + raw_cache_path = isolated_parso_cache.joinpath(_VERSION_TAG) + assert raw_cache_path.exists() + dir_names = os.listdir(raw_cache_path) a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL old_paths = set() - for path in paths[:len(test_subjects) // 2]: # make certain number of paths old - os.utime(os.path.join(raw_cache_path, path), (a_while_ago, a_while_ago)) - old_paths.add(path) + for dir_name in dir_names[:len(test_subjects) // 2]: # make certain number of paths old + os.utime(raw_cache_path.joinpath(dir_name), (a_while_ago, a_while_ago)) + old_paths.add(dir_name) # nothing should be cleared while the lock is on - assert os.path.exists(_get_cache_clear_lock().path) + assert _get_cache_clear_lock_path().exists() _remove_cache_and_update_lock() # it shouldn't clear anything assert len(os.listdir(raw_cache_path)) == len(test_subjects) assert old_paths.issubset(os.listdir(raw_cache_path)) - os.utime(_get_cache_clear_lock().path, (a_while_ago, a_while_ago)) + os.utime(_get_cache_clear_lock_path(), (a_while_ago, a_while_ago)) _remove_cache_and_update_lock() assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2 assert not old_paths.intersection(os.listdir(raw_cache_path)) @@ -180,12 +178,13 @@ def test_inactive_cache(tmpdir, isolated_parso_cache): @skip_pypy def test_permission_error(monkeypatch): def save(*args, **kwargs): - was_called[0] = True # Python 2... Use nonlocal instead + nonlocal was_called + was_called = True raise PermissionError - was_called = [False] + was_called = False monkeypatch.setattr(cache, '_save_to_file_system', save) with pytest.warns(Warning): parse(path=__file__, cache=True, diff_cache=True) - assert was_called[0] + assert was_called diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index 1904314..222236e 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from textwrap import dedent import logging -import sys import pytest @@ -39,7 +38,7 @@ def _check_error_leaves_nodes(node): return None -class Differ(object): +class Differ: grammar = load_grammar() def initialize(self, code): @@ -934,7 +933,6 @@ def test_many_nested_ifs(differ): differ.parse(code1, parsers=1, copies=1) -@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5") @pytest.mark.parametrize('prefix', ['', 'async ']) def test_with_and_funcdef_in_call(differ, prefix): code1 = prefix + dedent('''\ @@ -973,17 +971,16 @@ def test_random_unicode_characters(differ): Those issues were all found with the fuzzer. """ differ.initialize('') - differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, + differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True) - differ.parse(u'\r\r', parsers=1) - differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) - differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1, - expect_error_leaves=sys.version_info[0] == 2) + differ.parse('\r\r', parsers=1) + differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) + differ.parse('a\ntaǁ\rGĒōns__\n\nb', parsers=1) s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):' differ.parse(s, parsers=1, expect_error_leaves=True) differ.parse('') differ.parse(s + '\n', parsers=1, expect_error_leaves=True) - differ.parse(u' result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True) + differ.parse(' result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True) differ.parse('') differ.parse(' a( # xx\ndef', parsers=1, expect_error_leaves=True) @@ -996,7 +993,7 @@ def test_dedent_end_positions(differ): c = { 5} ''') - code2 = dedent(u'''\ + code2 = dedent('''\ if 1: if ⌟ഒᜈྡྷṭb: 2 @@ -1269,7 +1266,6 @@ def test_some_weird_removals(differ): differ.parse(code1, copies=1) -@pytest.mark.skipif(sys.version_info < (3, 5), reason="Async starts working in 3.5") def test_async_copy(differ): code1 = dedent('''\ async def main(): @@ -1340,7 +1336,7 @@ def test_backslash_issue(differ): pre = ( '') \\if - ''') + ''') # noqa differ.initialize(code1) differ.parse(code2, parsers=1, copies=1, expect_error_leaves=True) differ.parse(code1, parsers=1, copies=1) @@ -1420,7 +1416,7 @@ def test_with_formfeed(differ): \x0cimport return return '' - ''') + ''') # noqa differ.initialize(code1) differ.parse(code2, parsers=ANY, copies=ANY, expect_error_leaves=True) @@ -1588,14 +1584,14 @@ def test_byte_order_mark(differ): def test_byte_order_mark2(differ): - code = u'\ufeff# foo' + code = '\ufeff# foo' differ.initialize(code) differ.parse(code + 'x', parsers=ANY) def test_byte_order_mark3(differ): - code1 = u"\ufeff#\ny\n" - code2 = u'x\n\ufeff#\n\ufeff#\ny\n' + code1 = "\ufeff#\ny\n" + code2 = 'x\n\ufeff#\n\ufeff#\ny\n' differ.initialize(code1) differ.parse(code2, expect_error_leaves=True, parsers=ANY, copies=ANY) differ.parse(code1, parsers=1) diff --git a/test/test_error_recovery.py b/test/test_error_recovery.py index d0d3f7b..87efd47 100644 --- a/test/test_error_recovery.py +++ b/test/test_error_recovery.py @@ -74,7 +74,7 @@ def test_invalid_token(): def test_invalid_token_in_fstr(): - module = load_grammar(version='3.6').parse('f"{a + ? + b}"') + module = load_grammar(version='3.9').parse('f"{a + ? + b}"') error_node, q, plus_b, error1, error2, endmarker = module.children assert error_node.get_code() == 'f"{a +' assert q.value == '?' diff --git a/test/test_normalizer_issues_files.py b/test/test_normalizer_issues_files.py index 2aea1da..0427313 100644 --- a/test/test_normalizer_issues_files.py +++ b/test/test_normalizer_issues_files.py @@ -12,7 +12,7 @@ from parso.utils import python_bytes_to_unicode @total_ordering -class WantedIssue(object): +class WantedIssue: def __init__(self, code, line, column): self.code = code self._line = line @@ -42,9 +42,9 @@ def collect_errors(code): column = int(add_indent or len(match.group(1))) code, _, add_line = code.partition('+') - l = line_nr + 1 + int(add_line or 0) + ln = line_nr + 1 + int(add_line or 0) - yield WantedIssue(code[1:], l, column) + yield WantedIssue(code[1:], ln, column) def test_normalizer_issue(normalizer_issue_case): diff --git a/test/test_old_fast_parser.py b/test/test_old_fast_parser.py index 7e12a03..6f332cf 100644 --- a/test/test_old_fast_parser.py +++ b/test/test_old_fast_parser.py @@ -8,12 +8,11 @@ However the tests might still be relevant for the parser. from textwrap import dedent -from parso._compatibility import u from parso import parse def test_carriage_return_splitting(): - source = u(dedent(''' + source = dedent(''' @@ -21,7 +20,7 @@ def test_carriage_return_splitting(): class Foo(): pass - ''')) + ''') source = source.replace('\n', '\r\n') module = parse(source) assert [n.value for lst in module.get_used_names().values() for n in lst] == ['Foo'] @@ -136,7 +135,7 @@ def test_wrong_indentation(): b a """) - #check_p(src, 1) + check_p(src, 1) src = dedent("""\ def complex(): diff --git a/test/test_param_splitting.py b/test/test_param_splitting.py index f04fea7..3ea5f16 100644 --- a/test/test_param_splitting.py +++ b/test/test_param_splitting.py @@ -8,13 +8,13 @@ from textwrap import dedent from parso import parse -def assert_params(param_string, version=None, **wanted_dct): +def assert_params(param_string, **wanted_dct): source = dedent(''' def x(%s): pass ''') % param_string - module = parse(source, version=version) + module = parse(source) funcdef = next(module.iter_funcdefs()) dct = dict((p.name.value, p.default and p.default.get_code()) for p in funcdef.get_params()) @@ -23,23 +23,23 @@ def assert_params(param_string, version=None, **wanted_dct): def test_split_params_with_separation_star(): - assert_params(u'x, y=1, *, z=3', x=None, y='1', z='3', version='3.5') - assert_params(u'*, x', x=None, version='3.5') - assert_params(u'*', version='3.5') + assert_params('x, y=1, *, z=3', x=None, y='1', z='3') + assert_params('*, x', x=None) + assert_params('*') def test_split_params_with_stars(): - assert_params(u'x, *args', x=None, args=None) - assert_params(u'**kwargs', kwargs=None) - assert_params(u'*args, **kwargs', args=None, kwargs=None) + assert_params('x, *args', x=None, args=None) + assert_params('**kwargs', kwargs=None) + assert_params('*args, **kwargs', args=None, kwargs=None) -def test_kw_only_no_kw(works_ge_py3): +def test_kw_only_no_kw(works_in_py): """ Parsing this should be working. In CPython the parser also parses this and in a later step the AST complains. """ - module = works_ge_py3.parse('def test(arg, *):\n pass') + module = works_in_py.parse('def test(arg, *):\n pass') if module is not None: func = module.children[0] open_, p1, asterisk, close = func._get_param_nodes() diff --git a/test/test_parser.py b/test/test_parser.py index e9a9dda..e087b0d 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -3,7 +3,6 @@ from textwrap import dedent import pytest -from parso._compatibility import u from parso import parse from parso.python import tree from parso.utils import split_lines @@ -110,23 +109,15 @@ def test_param_splitting(each_version): but Jedi does this to simplify argument parsing. """ def check(src, result): - # Python 2 tuple params should be ignored for now. m = parse(src, version=each_version) - if each_version.startswith('2'): - # We don't want b and c to be a part of the param enumeration. Just - # ignore them, because it's not what we want to support in the - # future. - func = next(m.iter_funcdefs()) - assert [param.name.value for param in func.get_params()] == result - else: - assert not list(m.iter_funcdefs()) + assert not list(m.iter_funcdefs()) check('def x(a, (b, c)):\n pass', ['a']) check('def x((b, c)):\n pass', []) def test_unicode_string(): - s = tree.String(None, u('bö'), (0, 0)) + s = tree.String(None, 'bö', (0, 0)) assert repr(s) # Should not raise an Error! @@ -135,19 +126,10 @@ def test_backslash_dos_style(each_version): def test_started_lambda_stmt(each_version): - m = parse(u'lambda a, b: a i', version=each_version) + m = parse('lambda a, b: a i', version=each_version) assert m.children[0].type == 'error_node' -def test_python2_octal(each_version): - module = parse('0660', version=each_version) - first = module.children[0] - if each_version.startswith('2'): - assert first.type == 'number' - else: - assert first.type == 'error_node' - - @pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar']) def test_open_string_literal(each_version, code): """ @@ -194,10 +176,12 @@ def test_no_error_nodes(each_version): def test_named_expression(works_ge_py38): works_ge_py38.parse("(a := 1, a + 1)") + def test_extended_rhs_annassign(works_ge_py38): works_ge_py38.parse("x: y = z,") works_ge_py38.parse("x: Tuple[int, ...] = z, *q, w") + @pytest.mark.parametrize( 'param_code', [ 'a=1, /', @@ -212,6 +196,7 @@ def test_extended_rhs_annassign(works_ge_py38): def test_positional_only_arguments(works_ge_py38, param_code): works_ge_py38.parse("def x(%s): pass" % param_code) + @pytest.mark.parametrize( 'expression', [ 'a + a', diff --git a/test/test_parser_tree.py b/test/test_parser_tree.py index 7408479..0201f7c 100644 --- a/test/test_parser_tree.py +++ b/test/test_parser_tree.py @@ -8,7 +8,7 @@ from parso import parse from parso.python import tree -class TestsFunctionAndLambdaParsing(object): +class TestsFunctionAndLambdaParsing: FIXTURES = [ ('def my_function(x, y, z) -> str:\n return x + y * z\n', { @@ -26,7 +26,7 @@ class TestsFunctionAndLambdaParsing(object): @pytest.fixture(params=FIXTURES) def node(self, request): - parsed = parse(dedent(request.param[0]), version='3.5') + parsed = parse(dedent(request.param[0]), version='3.10') request.keywords['expected'] = request.param[1] child = parsed.children[0] if child.type == 'simple_stmt': @@ -79,16 +79,16 @@ def test_default_param(each_version): assert not param.star_count -def test_annotation_param(each_py3_version): - func = parse('def x(foo: 3): pass', version=each_py3_version).children[0] +def test_annotation_param(each_version): + func = parse('def x(foo: 3): pass', version=each_version).children[0] param, = func.get_params() assert param.default is None assert param.annotation.value == '3' assert not param.star_count -def test_annotation_params(each_py3_version): - func = parse('def x(foo: 3, bar: 4): pass', version=each_py3_version).children[0] +def test_annotation_params(each_version): + func = parse('def x(foo: 3, bar: 4): pass', version=each_version).children[0] param1, param2 = func.get_params() assert param1.default is None @@ -100,23 +100,14 @@ def test_annotation_params(each_py3_version): assert not param2.star_count -def test_default_and_annotation_param(each_py3_version): - func = parse('def x(foo:3=42): pass', version=each_py3_version).children[0] +def test_default_and_annotation_param(each_version): + func = parse('def x(foo:3=42): pass', version=each_version).children[0] param, = func.get_params() assert param.default.value == '42' assert param.annotation.value == '3' assert not param.star_count -def test_ellipsis_py2(each_py2_version): - module = parse('[0][...]', version=each_py2_version, error_recovery=False) - expr = module.children[0] - trailer = expr.children[-1] - subscript = trailer.children[1] - assert subscript.type == 'subscript' - assert [leaf.value for leaf in subscript.children] == ['.', '.', '.'] - - def get_yield_exprs(code, version): return list(parse(code, version=version).children[0].iter_yield_exprs()) @@ -172,13 +163,13 @@ def top_function_three(): raise Exception """ - r = get_raise_stmts(code, 0) # Lists in a simple Function + r = get_raise_stmts(code, 0) # Lists in a simple Function assert len(list(r)) == 1 - r = get_raise_stmts(code, 1) # Doesn't Exceptions list in closures + r = get_raise_stmts(code, 1) # Doesn't Exceptions list in closures assert len(list(r)) == 1 - r = get_raise_stmts(code, 2) # Lists inside try-catch + r = get_raise_stmts(code, 2) # Lists inside try-catch assert len(list(r)) == 2 diff --git a/test/test_pep8.py b/test/test_pep8.py index 44c11f4..43ae767 100644 --- a/test/test_pep8.py +++ b/test/test_pep8.py @@ -33,6 +33,7 @@ def test_eof_blankline(): assert_issue('# foobar\n\n') assert_issue('\n\n') + def test_shebang(): assert not issues('#!\n') assert not issues('#!/foo\n') diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 158ec29..9b0dd34 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -1,11 +1,3 @@ -"""Test suite for 2to3's parser and grammar files. - -This is the place to add tests for changes to 2to3's grammar, such as those -merging the grammars for Python 2 and 3. In addition to specific tests for -parts of the grammar we've changed, we also make sure we can parse the -test_grammar.py files from both Python 2 and Python 3. -""" - from textwrap import dedent import pytest @@ -30,35 +22,35 @@ def _invalid_syntax(code, version=None, **kwargs): def test_formfeed(each_version): - s = u"foo\n\x0c\nfoo\n" + s = "foo\n\x0c\nfoo\n" t = _parse(s, each_version) assert t.children[0].children[0].type == 'name' assert t.children[1].children[0].type == 'name' - s = u"1\n\x0c\x0c\n2\n" + s = "1\n\x0c\x0c\n2\n" t = _parse(s, each_version) with pytest.raises(ParserSyntaxError): - s = u"\n\x0c2\n" + s = "\n\x0c2\n" _parse(s, each_version) -def test_matrix_multiplication_operator(works_ge_py35): - works_ge_py35.parse("a @ b") - works_ge_py35.parse("a @= b") +def test_matrix_multiplication_operator(works_in_py): + works_in_py.parse("a @ b") + works_in_py.parse("a @= b") -def test_yield_from(works_ge_py3, each_version): - works_ge_py3.parse("yield from x") - works_ge_py3.parse("(yield from x) + y") +def test_yield_from(works_in_py, each_version): + works_in_py.parse("yield from x") + works_in_py.parse("(yield from x) + y") _invalid_syntax("yield from", each_version) -def test_await_expr(works_ge_py35): - works_ge_py35.parse("""async def foo(): +def test_await_expr(works_in_py): + works_in_py.parse("""async def foo(): await x """) - works_ge_py35.parse("""async def foo(): + works_in_py.parse("""async def foo(): def foo(): pass @@ -67,24 +59,27 @@ def test_await_expr(works_ge_py35): await x """) - works_ge_py35.parse("""async def foo(): return await a""") + works_in_py.parse("""async def foo(): return await a""") - works_ge_py35.parse("""def foo(): + works_in_py.parse("""def foo(): def foo(): pass async def foo(): await x """) -@pytest.mark.skipif('sys.version_info[:2] < (3, 5)') -@pytest.mark.xfail(reason="acting like python 3.7") -def test_async_var(): - _parse("""async = 1""", "3.5") - _parse("""await = 1""", "3.5") - _parse("""def async(): pass""", "3.5") +@pytest.mark.parametrize( + 'code', [ + "async = 1", + "await = 1", + "def async(): pass", + ] +) +def test_async_var(works_not_in_py, code): + works_not_in_py.parse(code) -def test_async_for(works_ge_py35): - works_ge_py35.parse("async def foo():\n async for a in b: pass") +def test_async_for(works_in_py): + works_in_py.parse("async def foo():\n async for a in b: pass") @pytest.mark.parametrize("body", [ @@ -114,77 +109,89 @@ def test_async_for(works_ge_py35): 1 async for a in b ]""", ]) -def test_async_for_comprehension_newline(works_ge_py36, body): +def test_async_for_comprehension_newline(works_in_py, body): # Issue #139 - works_ge_py36.parse("""async def foo(): + works_in_py.parse("""async def foo(): {}""".format(body)) -def test_async_with(works_ge_py35): - works_ge_py35.parse("async def foo():\n async with a: pass") +def test_async_with(works_in_py): + works_in_py.parse("async def foo():\n async with a: pass") - @pytest.mark.skipif('sys.version_info[:2] < (3, 5)') - @pytest.mark.xfail(reason="acting like python 3.7") - def test_async_with_invalid(): - _invalid_syntax("""def foo(): - async with a: pass""", version="3.5") + +def test_async_with_invalid(works_in_py): + works_in_py.parse("""def foo():\n async with a: pass""") def test_raise_3x_style_1(each_version): _parse("raise", each_version) -def test_raise_2x_style_2(works_in_py2): - works_in_py2.parse("raise E, V") +def test_raise_2x_style_2(works_not_in_py): + works_not_in_py.parse("raise E, V") + + +def test_raise_2x_style_3(works_not_in_py): + works_not_in_py.parse("raise E, V, T") -def test_raise_2x_style_3(works_in_py2): - works_in_py2.parse("raise E, V, T") def test_raise_2x_style_invalid_1(each_version): _invalid_syntax("raise E, V, T, Z", version=each_version) -def test_raise_3x_style(works_ge_py3): - works_ge_py3.parse("raise E1 from E2") + +def test_raise_3x_style(works_in_py): + works_in_py.parse("raise E1 from E2") + def test_raise_3x_style_invalid_1(each_version): _invalid_syntax("raise E, V from E1", each_version) + def test_raise_3x_style_invalid_2(each_version): _invalid_syntax("raise E from E1, E2", each_version) + def test_raise_3x_style_invalid_3(each_version): _invalid_syntax("raise from E1, E2", each_version) + def test_raise_3x_style_invalid_4(each_version): _invalid_syntax("raise E from", each_version) # Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testFuncdef -def test_annotation_1(works_ge_py3): - works_ge_py3.parse("""def f(x) -> list: pass""") +def test_annotation_1(works_in_py): + works_in_py.parse("""def f(x) -> list: pass""") -def test_annotation_2(works_ge_py3): - works_ge_py3.parse("""def f(x:int): pass""") -def test_annotation_3(works_ge_py3): - works_ge_py3.parse("""def f(*x:str): pass""") +def test_annotation_2(works_in_py): + works_in_py.parse("""def f(x:int): pass""") -def test_annotation_4(works_ge_py3): - works_ge_py3.parse("""def f(**x:float): pass""") -def test_annotation_5(works_ge_py3): - works_ge_py3.parse("""def f(x, y:1+2): pass""") +def test_annotation_3(works_in_py): + works_in_py.parse("""def f(*x:str): pass""") -def test_annotation_6(each_py3_version): - _invalid_syntax("""def f(a, (b:1, c:2, d)): pass""", each_py3_version) -def test_annotation_7(each_py3_version): - _invalid_syntax("""def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6): pass""", each_py3_version) +def test_annotation_4(works_in_py): + works_in_py.parse("""def f(**x:float): pass""") -def test_annotation_8(each_py3_version): + +def test_annotation_5(works_in_py): + works_in_py.parse("""def f(x, y:1+2): pass""") + + +def test_annotation_6(each_version): + _invalid_syntax("""def f(a, (b:1, c:2, d)): pass""", each_version) + + +def test_annotation_7(each_version): + _invalid_syntax("""def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6): pass""", each_version) + + +def test_annotation_8(each_version): s = """def f(a, (b:1, c:2, d), e:3=4, f=5, *g:6, h:7, i=8, j:9=10, **k:11) -> 12: pass""" - _invalid_syntax(s, each_py3_version) + _invalid_syntax(s, each_version) def test_except_new(each_version): @@ -195,27 +202,31 @@ def test_except_new(each_version): y""") _parse(s, each_version) -def test_except_old(works_in_py2): + +def test_except_old(works_not_in_py): s = dedent(""" try: x except E, N: y""") - works_in_py2.parse(s) + works_not_in_py.parse(s) # Adapted from Python 3's Lib/test/test_grammar.py:GrammarTests.testAtoms -def test_set_literal_1(works_ge_py27): - works_ge_py27.parse("""x = {'one'}""") +def test_set_literal_1(works_in_py): + works_in_py.parse("""x = {'one'}""") -def test_set_literal_2(works_ge_py27): - works_ge_py27.parse("""x = {'one', 1,}""") -def test_set_literal_3(works_ge_py27): - works_ge_py27.parse("""x = {'one', 'two', 'three'}""") +def test_set_literal_2(works_in_py): + works_in_py.parse("""x = {'one', 1,}""") -def test_set_literal_4(works_ge_py27): - works_ge_py27.parse("""x = {2, 3, 4,}""") + +def test_set_literal_3(works_in_py): + works_in_py.parse("""x = {'one', 'two', 'three'}""") + + +def test_set_literal_4(works_in_py): + works_in_py.parse("""x = {2, 3, 4,}""") def test_new_octal_notation(each_version): @@ -223,21 +234,21 @@ def test_new_octal_notation(each_version): _invalid_syntax("""0o7324528887""", each_version) -def test_old_octal_notation(works_in_py2): - works_in_py2.parse("07") +def test_old_octal_notation(works_not_in_py): + works_not_in_py.parse("07") -def test_long_notation(works_in_py2): - works_in_py2.parse("0xFl") - works_in_py2.parse("0xFL") - works_in_py2.parse("0b1l") - works_in_py2.parse("0B1L") - works_in_py2.parse("0o7l") - works_in_py2.parse("0O7L") - works_in_py2.parse("0l") - works_in_py2.parse("0L") - works_in_py2.parse("10l") - works_in_py2.parse("10L") +def test_long_notation(works_not_in_py): + works_not_in_py.parse("0xFl") + works_not_in_py.parse("0xFL") + works_not_in_py.parse("0b1l") + works_not_in_py.parse("0B1L") + works_not_in_py.parse("0o7l") + works_not_in_py.parse("0O7L") + works_not_in_py.parse("0l") + works_not_in_py.parse("0L") + works_not_in_py.parse("10l") + works_not_in_py.parse("10L") def test_new_binary_notation(each_version): @@ -245,28 +256,24 @@ def test_new_binary_notation(each_version): _invalid_syntax("""0b0101021""", each_version) -def test_class_new_syntax(works_ge_py3): - works_ge_py3.parse("class B(t=7): pass") - works_ge_py3.parse("class B(t, *args): pass") - works_ge_py3.parse("class B(t, **kwargs): pass") - works_ge_py3.parse("class B(t, *args, **kwargs): pass") - works_ge_py3.parse("class B(t, y=9, *args, **kwargs): pass") +def test_class_new_syntax(works_in_py): + works_in_py.parse("class B(t=7): pass") + works_in_py.parse("class B(t, *args): pass") + works_in_py.parse("class B(t, **kwargs): pass") + works_in_py.parse("class B(t, *args, **kwargs): pass") + works_in_py.parse("class B(t, y=9, *args, **kwargs): pass") -def test_parser_idempotency_extended_unpacking(works_ge_py3): +def test_parser_idempotency_extended_unpacking(works_in_py): """A cut-down version of pytree_idempotency.py.""" - works_ge_py3.parse("a, *b, c = x\n") - works_ge_py3.parse("[*a, b] = x\n") - works_ge_py3.parse("(z, *y, w) = m\n") - works_ge_py3.parse("for *z, m in d: pass\n") + works_in_py.parse("a, *b, c = x\n") + works_in_py.parse("[*a, b] = x\n") + works_in_py.parse("(z, *y, w) = m\n") + works_in_py.parse("for *z, m in d: pass\n") def test_multiline_bytes_literals(each_version): - """ - It's not possible to get the same result when using \xaa in Python 2/3, - because it's treated differently. - """ - s = u""" + s = """ md5test(b"\xaa" * 80, (b"Test Using Larger Than Block-Size Key " b"and Larger Than One Block-Size Data"), @@ -285,17 +292,17 @@ def test_multiline_bytes_tripquote_literals(each_version): _parse(s, each_version) -def test_ellipsis(works_ge_py3, each_version): - works_ge_py3.parse("...") +def test_ellipsis(works_in_py, each_version): + works_in_py.parse("...") _parse("[0][...]", version=each_version) -def test_dict_unpacking(works_ge_py35): - works_ge_py35.parse("{**dict(a=3), foo:2}") +def test_dict_unpacking(works_in_py): + works_in_py.parse("{**dict(a=3), foo:2}") def test_multiline_str_literals(each_version): - s = u""" + s = """ md5test("\xaa" * 80, ("Test Using Larger Than Block-Size Key " "and Larger Than One Block-Size Data"), @@ -304,24 +311,24 @@ def test_multiline_str_literals(each_version): _parse(s, each_version) -def test_py2_backticks(works_in_py2): - works_in_py2.parse("`1`") +def test_py2_backticks(works_not_in_py): + works_not_in_py.parse("`1`") -def test_py2_string_prefixes(works_in_py2): - works_in_py2.parse("ur'1'") - works_in_py2.parse("Ur'1'") - works_in_py2.parse("UR'1'") - _invalid_syntax("ru'1'", works_in_py2.version) +def test_py2_string_prefixes(works_not_in_py): + works_not_in_py.parse("ur'1'") + works_not_in_py.parse("Ur'1'") + works_not_in_py.parse("UR'1'") + _invalid_syntax("ru'1'", works_not_in_py.version) def py_br(each_version): _parse('br""', each_version) -def test_py3_rb(works_ge_py3): - works_ge_py3.parse("rb'1'") - works_ge_py3.parse("RB'1'") +def test_py3_rb(works_in_py): + works_in_py.parse("rb'1'") + works_in_py.parse("RB'1'") def test_left_recursion(): @@ -332,7 +339,7 @@ def test_left_recursion(): @pytest.mark.parametrize( 'grammar, error_match', [ ['foo: bar | baz\nbar: NAME\nbaz: NAME\n', - r"foo is ambiguous.*given a TokenType\(NAME\).*bar or baz"], + r"foo is ambiguous.*given a PythonTokenTypes\.NAME.*bar or baz"], ['''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', r"foo is ambiguous.*given a ReservedString\(x\).*bar or baz"], ['''foo: bar | 'x'\nbar: 'x'\n''', diff --git a/test/test_prefix.py b/test/test_prefix.py index 0c79958..a6e254b 100644 --- a/test/test_prefix.py +++ b/test/test_prefix.py @@ -1,9 +1,4 @@ -try: - from itertools import zip_longest -except ImportError: - # Python 2 - from itertools import izip_longest as zip_longest - +from itertools import zip_longest from codecs import BOM_UTF8 import pytest @@ -44,7 +39,7 @@ def test_simple_prefix_splitting(string, tokens): else: end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing) - #assert start_pos == pt.start_pos + # assert start_pos == pt.start_pos assert end_pos == pt.end_pos start_pos = end_pos diff --git a/test/test_python_errors.py b/test/test_python_errors.py index 0b45f11..00d01cc 100644 --- a/test/test_python_errors.py +++ b/test/test_python_errors.py @@ -48,10 +48,7 @@ def test_non_async_in_async(): This example doesn't work with FAILING_EXAMPLES, because the line numbers are not always the same / incorrect in Python 3.8. """ - if sys.version_info[:2] < (3, 5): - pytest.skip() - - # Raises multiple errors in previous versions. + # Raises multiple errors in previous versions. code = 'async def foo():\n def nofoo():[x async for x in []]' wanted, line_nr = _get_actual_exception(code) @@ -120,16 +117,9 @@ def _get_actual_exception(code): assert False, "The piece of code should raise an exception." # SyntaxError - if wanted == 'SyntaxError: non-keyword arg after keyword arg': - # The python 3.5+ way, a bit nicer. - wanted = 'SyntaxError: positional argument follows keyword argument' - elif wanted == 'SyntaxError: assignment to keyword': + if wanted == 'SyntaxError: assignment to keyword': return [wanted, "SyntaxError: can't assign to keyword", 'SyntaxError: cannot assign to __debug__'], line_nr - elif wanted == 'SyntaxError: can use starred expression only as assignment target': - # Python 3.4/3.4 have a bit of a different warning than 3.5/3.6 in - # certain places. But in others this error makes sense. - return [wanted, "SyntaxError: can't use starred expression here"], line_nr elif wanted == 'SyntaxError: f-string: unterminated string': wanted = 'SyntaxError: EOL while scanning string literal' elif wanted == 'SyntaxError: f-string expression part cannot include a backslash': @@ -259,10 +249,7 @@ def test_escape_decode_literals(each_version): # Finally bytes. error, = _get_error_list(r'b"\x"', version=each_version) - wanted = r'SyntaxError: (value error) invalid \x escape' - if sys.version_info >= (3, 0): - # The positioning information is only available in Python 3. - wanted += ' at position 0' + wanted = r'SyntaxError: (value error) invalid \x escape at position 0' assert error.message == wanted @@ -273,10 +260,12 @@ def test_too_many_levels_of_indentation(): assert not _get_error_list(build_nested('pass', 49, base=base)) assert _get_error_list(build_nested('pass', 50, base=base)) + def test_paren_kwarg(): assert _get_error_list("print((sep)=seperator)", version="3.8") assert not _get_error_list("print((sep)=seperator)", version="3.7") + @pytest.mark.parametrize( 'code', [ "f'{*args,}'", @@ -330,6 +319,7 @@ def test_trailing_comma(code): errors = _get_error_list(code) assert not errors + def test_continue_in_finally(): code = dedent('''\ for a in [1]: @@ -341,7 +331,7 @@ def test_continue_in_finally(): assert not _get_error_list(code, version="3.8") assert _get_error_list(code, version="3.7") - + @pytest.mark.parametrize( 'template', [ "a, b, {target}, c = d", @@ -392,6 +382,7 @@ def test_repeated_kwarg(): def test_unparenthesized_genexp(source, no_errors): assert bool(_get_error_list(source)) ^ no_errors + @pytest.mark.parametrize( ('source', 'no_errors'), [ ('*x = 2', False), diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 7afa373..0029fc8 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 # This file contains Unicode characters. -import sys from textwrap import dedent import pytest @@ -31,7 +30,7 @@ FSTRING_END = PythonTokenTypes.FSTRING_END def _get_token_list(string, version=None): # Load the current version. version_info = parse_version_string(version) - return list(tokenize.tokenize(string, version_info)) + return list(tokenize.tokenize(string, version_info=version_info)) def test_end_pos_one_line(): @@ -108,7 +107,7 @@ def test_tokenize_multiline_I(): fundef = '''""""\n''' token_list = _get_token_list(fundef) assert token_list == [PythonToken(ERRORTOKEN, '""""\n', (1, 0), ''), - PythonToken(ENDMARKER , '', (2, 0), '')] + PythonToken(ENDMARKER, '', (2, 0), '')] def test_tokenize_multiline_II(): @@ -117,7 +116,7 @@ def test_tokenize_multiline_II(): fundef = '''""""''' token_list = _get_token_list(fundef) assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''), - PythonToken(ENDMARKER, '', (1, 4), '')] + PythonToken(ENDMARKER, '', (1, 4), '')] def test_tokenize_multiline_III(): @@ -126,7 +125,7 @@ def test_tokenize_multiline_III(): fundef = '''""""\n\n''' token_list = _get_token_list(fundef) assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''), - PythonToken(ENDMARKER, '', (3, 0), '')] + PythonToken(ENDMARKER, '', (3, 0), '')] def test_identifier_contains_unicode(): @@ -136,12 +135,7 @@ def test_identifier_contains_unicode(): ''') token_list = _get_token_list(fundef) unicode_token = token_list[1] - if sys.version_info.major >= 3: - assert unicode_token[0] == NAME - else: - # Unicode tokens in Python 2 seem to be identified as operators. - # They will be ignored in the parser, that's ok. - assert unicode_token[0] == ERRORTOKEN + assert unicode_token[0] == NAME def test_quoted_strings(): @@ -184,19 +178,16 @@ def test_ur_literals(): assert typ == NAME check('u""') - check('ur""', is_literal=not sys.version_info.major >= 3) - check('Ur""', is_literal=not sys.version_info.major >= 3) - check('UR""', is_literal=not sys.version_info.major >= 3) + check('ur""', is_literal=False) + check('Ur""', is_literal=False) + check('UR""', is_literal=False) check('bR""') - # Starting with Python 3.3 this ordering is also possible. - if sys.version_info.major >= 3: - check('Rb""') + check('Rb""') - # Starting with Python 3.6 format strings where introduced. - check('fr""', is_literal=sys.version_info >= (3, 6)) - check('rF""', is_literal=sys.version_info >= (3, 6)) - check('f""', is_literal=sys.version_info >= (3, 6)) - check('F""', is_literal=sys.version_info >= (3, 6)) + check('fr""') + check('rF""') + check('f""') + check('F""') def test_error_literal(): @@ -229,9 +220,6 @@ def test_endmarker_end_pos(): check('a\\') -xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')]) - - @pytest.mark.parametrize( ('code', 'types'), [ # Indentation @@ -243,12 +231,10 @@ xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Pyth # Name stuff ('1foo1', [NUMBER, NAME]), - pytest.param( - u'மெல்லினம்', [NAME], - **xfail_py2), - pytest.param(u'²', [ERRORTOKEN], **xfail_py2), - pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2), - pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2), + ('மெல்லினம்', [NAME]), + ('²', [ERRORTOKEN]), + ('ä²ö', [NAME, ERRORTOKEN, NAME]), + ('ää²¹öö', [NAME, ERRORTOKEN, NAME]), (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]), (dedent('''\ class BaseCache: @@ -411,8 +397,8 @@ def test_backslash(): ]), ] ) -def test_fstring_token_types(code, types, version_ge_py36): - actual_types = [t.type for t in _get_token_list(code, version_ge_py36)] +def test_fstring_token_types(code, types, each_version): + actual_types = [t.type for t in _get_token_list(code, each_version)] assert types + [ENDMARKER] == actual_types diff --git a/test/test_utils.py b/test/test_utils.py index 541d81f..06fbe79 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -83,6 +83,7 @@ def test_bytes_to_unicode_failing_encoding(code, errors): else: python_bytes_to_unicode(code, errors=errors) + @pytest.mark.parametrize( ('version_str', 'version'), [ ('3', (3,)), diff --git a/tox.ini b/tox.ini deleted file mode 100644 index c0f7675..0000000 --- a/tox.ini +++ /dev/null @@ -1,15 +0,0 @@ -[tox] -envlist = {py27,py34,py35,py36,py37,py38} -[testenv] -extras = testing -deps = - py27,py34: pytest<3.3 - coverage: coverage -setenv = -# https://github.com/tomchristie/django-rest-framework/issues/1957 -# tox corrupts __pycache__, solution from here: - PYTHONDONTWRITEBYTECODE=1 - coverage: TOX_TESTENV_COMMAND=coverage run -m pytest -commands = - {env:TOX_TESTENV_COMMAND:pytest} {posargs} - coverage: coverage report