diff --git a/parso/grammar.py b/parso/grammar.py index 0324740..2c7285a 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -1,9 +1,12 @@ import hashlib import os +from enum import Enum +from typing import Generic, TypeVar, Union, Dict, Optional, Any from parso._compatibility import is_pypy from parso.pgen2 import generate_grammar -from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string +from parso.utils import split_lines, python_bytes_to_unicode, \ + PythonVersionInfo, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize from parso.python.token import PythonTokenTypes @@ -13,12 +16,14 @@ from parso.python.parser import Parser as PythonParser from parso.python.errors import ErrorFinderConfig from parso.python import pep8 from parso.file_io import FileIO, KnownContentFileIO -from parso.normalizer import RefactoringNormalizer +from parso.normalizer import RefactoringNormalizer, NormalizerConfig -_loaded_grammars = {} +_loaded_grammars: Dict[str, 'Grammar'] = {} + +_NodeT = TypeVar("_NodeT") -class Grammar(object): +class Grammar(Generic[_NodeT]): """ :py:func:`parso.load_grammar` returns instances of this class. @@ -26,11 +31,12 @@ class Grammar(object): :param text: A BNF representation of your grammar. """ - _error_normalizer_config = None - _token_namespace = None - _default_normalizer_config = pep8.PEP8NormalizerConfig() + _start_nonterminal: str + _error_normalizer_config: Optional[ErrorFinderConfig] = None + _token_namespace: Any = None + _default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig() - def __init__(self, text, *, tokenizer, parser=BaseParser, diff_parser=None): + def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None): self._pgen_grammar = generate_grammar( text, token_namespace=self._get_token_namespace() @@ -40,9 +46,16 @@ class Grammar(object): self._diff_parser = diff_parser self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest() - def parse(self, code=None, *, error_recovery=True, path=None, - start_symbol=None, cache=False, diff_cache=False, - cache_path=None, file_io=None): + def parse(self, + code: Union[str, bytes] = None, + *, + error_recovery=True, + path: str = None, + start_symbol: str = None, + cache=False, + diff_cache=False, + cache_path: str = None, + file_io: FileIO = None) -> _NodeT: """ If you want to parse a Python file you want to start here, most likely. @@ -95,7 +108,7 @@ class Grammar(object): if cache and file_io.path is not None: module_node = load_module(self._hashed, file_io, cache_path=cache_path) if module_node is not None: - return module_node + return module_node # type: ignore if code is None: code = file_io.read() @@ -114,7 +127,7 @@ class Grammar(object): module_node = module_cache_item.node old_lines = module_cache_item.lines if old_lines == lines: - return module_node + return module_node # type: ignore new_node = self._diff_parser( self._pgen_grammar, self._tokenizer, module_node @@ -126,7 +139,7 @@ class Grammar(object): # Never pickle in pypy, it's slow as hell. pickling=cache and not is_pypy, cache_path=cache_path) - return new_node + return new_node # type: ignore tokens = self._tokenizer(lines) @@ -142,7 +155,7 @@ class Grammar(object): # Never pickle in pypy, it's slow as hell. pickling=cache and not is_pypy, cache_path=cache_path) - return root_node + return root_node # type: ignore def _get_token_namespace(self): ns = self._token_namespace @@ -196,7 +209,7 @@ class PythonGrammar(Grammar): _token_namespace = PythonTokenTypes _start_nonterminal = 'file_input' - def __init__(self, version_info, bnf_text): + def __init__(self, version_info: PythonVersionInfo, bnf_text: str): super(PythonGrammar, self).__init__( bnf_text, tokenizer=self._tokenize_lines, @@ -213,7 +226,7 @@ class PythonGrammar(Grammar): return tokenize(code, version_info=self.version_info) -def load_grammar(*, language='python', version=None, path=None): +def load_grammar(*, language: str = 'python', version: str = None, path: str = None): """ Loads a :py:class:`parso.Grammar`. The default version is the current Python version. diff --git a/parso/grammar.pyi b/parso/grammar.pyi deleted file mode 100644 index e5cd2ea..0000000 --- a/parso/grammar.pyi +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union -from typing_extensions import Literal - -from parso.utils import PythonVersionInfo - -_Token = Any -_NodeT = TypeVar("_NodeT") - -class Grammar(Generic[_NodeT]): - _default_normalizer_config: Optional[Any] = ... - _error_normalizer_config: Optional[Any] = None - _start_nonterminal: str = ... - _token_namespace: Optional[str] = None - def __init__( - self, - text: str, - tokenizer: Callable[[Sequence[str], int], Sequence[_Token]], - parser: Any = ..., - diff_parser: Any = ..., - ) -> None: ... - def parse( - self, - code: Union[str, bytes] = ..., - error_recovery: bool = ..., - path: Optional[str] = ..., - start_symbol: Optional[str] = ..., - cache: bool = ..., - diff_cache: bool = ..., - cache_path: Optional[str] = ..., - ) -> _NodeT: ... - -class PythonGrammar(Grammar): - version_info: PythonVersionInfo - def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ... - -def load_grammar( - language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ... -) -> Grammar: ... diff --git a/parso/parser.py b/parso/parser.py index a7cc6fa..ab7d254 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -23,7 +23,7 @@ within the statement. This lowers memory usage and cpu time and reduces the complexity of the ``Parser`` (there's another parser sitting inside ``Statement``, which produces ``Array`` and ``Call``). """ -from typing import Dict +from typing import Dict, Generic, TypeVar from parso import tree from parso.pgen2.generator import ReservedString diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 0d2bd2f..896772e 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -18,7 +18,7 @@ from codecs import BOM_UTF8 from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \ Pattern, Set -from parso.python.token import PythonTokenTypes, TokenType +from parso.python.token import PythonTokenTypes from parso.utils import split_lines, PythonVersionInfo, parse_version_string @@ -226,7 +226,7 @@ def _create_token_collection(version_info): class Token(NamedTuple): - type: TokenType + type: PythonTokenTypes string: str start_pos: Tuple[int, int] prefix: str diff --git a/parso/utils.py b/parso/utils.py index bc38e60..5a286af 100644 --- a/parso/utils.py +++ b/parso/utils.py @@ -106,9 +106,6 @@ def python_bytes_to_unicode( return source encoding = detect_encoding() - if not isinstance(encoding, str): - encoding = str(encoding, 'utf-8', 'replace') - try: # Cast to unicode return str(source, encoding, errors) @@ -132,32 +129,13 @@ def version_info() -> Version: return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) -def _parse_version(version): - match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) - if match is None: - raise ValueError('The given version is not in the right format. ' - 'Use something like "3.8" or "3".') - - major = int(match.group(1)) - minor = match.group(2) - if minor is None: - # Use the latest Python in case it's not exactly defined, because the - # grammars are typically backwards compatible? - if major == 2: - minor = "7" - elif major == 3: - minor = "6" - else: - raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") - minor = int(minor) - return PythonVersionInfo(major, minor) - - -@total_ordering -class PythonVersionInfo(NamedTuple): +class _PythonVersionInfo(NamedTuple): major: int minor: int + +@total_ordering +class PythonVersionInfo(_PythonVersionInfo): def __gt__(self, other): if isinstance(other, tuple): if len(other) != 2: @@ -178,6 +156,27 @@ class PythonVersionInfo(NamedTuple): return not self.__eq__(other) +def _parse_version(version) -> PythonVersionInfo: + match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) + if match is None: + raise ValueError('The given version is not in the right format. ' + 'Use something like "3.8" or "3".') + + major = int(match.group(1)) + minor = match.group(2) + if minor is None: + # Use the latest Python in case it's not exactly defined, because the + # grammars are typically backwards compatible? + if major == 2: + minor = "7" + elif major == 3: + minor = "6" + else: + raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") + minor = int(minor) + return PythonVersionInfo(major, minor) + + def parse_version_string(version: str = None) -> PythonVersionInfo: """ Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and