mirror of
https://github.com/davidhalter/parso.git
synced 2026-05-25 01:38:52 +08:00
Move grammar stubs into grammar.py
This commit is contained in:
+30
-17
@@ -1,9 +1,12 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Generic, TypeVar, Union, Dict, Optional, Any
|
||||||
|
|
||||||
from parso._compatibility import is_pypy
|
from parso._compatibility import is_pypy
|
||||||
from parso.pgen2 import generate_grammar
|
from parso.pgen2 import generate_grammar
|
||||||
from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
|
from parso.utils import split_lines, python_bytes_to_unicode, \
|
||||||
|
PythonVersionInfo, parse_version_string
|
||||||
from parso.python.diff import DiffParser
|
from parso.python.diff import DiffParser
|
||||||
from parso.python.tokenize import tokenize_lines, tokenize
|
from parso.python.tokenize import tokenize_lines, tokenize
|
||||||
from parso.python.token import PythonTokenTypes
|
from parso.python.token import PythonTokenTypes
|
||||||
@@ -13,12 +16,14 @@ from parso.python.parser import Parser as PythonParser
|
|||||||
from parso.python.errors import ErrorFinderConfig
|
from parso.python.errors import ErrorFinderConfig
|
||||||
from parso.python import pep8
|
from parso.python import pep8
|
||||||
from parso.file_io import FileIO, KnownContentFileIO
|
from parso.file_io import FileIO, KnownContentFileIO
|
||||||
from parso.normalizer import RefactoringNormalizer
|
from parso.normalizer import RefactoringNormalizer, NormalizerConfig
|
||||||
|
|
||||||
_loaded_grammars = {}
|
_loaded_grammars: Dict[str, 'Grammar'] = {}
|
||||||
|
|
||||||
|
_NodeT = TypeVar("_NodeT")
|
||||||
|
|
||||||
|
|
||||||
class Grammar(object):
|
class Grammar(Generic[_NodeT]):
|
||||||
"""
|
"""
|
||||||
:py:func:`parso.load_grammar` returns instances of this class.
|
:py:func:`parso.load_grammar` returns instances of this class.
|
||||||
|
|
||||||
@@ -26,11 +31,12 @@ class Grammar(object):
|
|||||||
|
|
||||||
:param text: A BNF representation of your grammar.
|
:param text: A BNF representation of your grammar.
|
||||||
"""
|
"""
|
||||||
_error_normalizer_config = None
|
_start_nonterminal: str
|
||||||
_token_namespace = None
|
_error_normalizer_config: Optional[ErrorFinderConfig] = None
|
||||||
_default_normalizer_config = pep8.PEP8NormalizerConfig()
|
_token_namespace: Any = None
|
||||||
|
_default_normalizer_config: NormalizerConfig = pep8.PEP8NormalizerConfig()
|
||||||
|
|
||||||
def __init__(self, text, *, tokenizer, parser=BaseParser, diff_parser=None):
|
def __init__(self, text: str, *, tokenizer, parser=BaseParser, diff_parser=None):
|
||||||
self._pgen_grammar = generate_grammar(
|
self._pgen_grammar = generate_grammar(
|
||||||
text,
|
text,
|
||||||
token_namespace=self._get_token_namespace()
|
token_namespace=self._get_token_namespace()
|
||||||
@@ -40,9 +46,16 @@ class Grammar(object):
|
|||||||
self._diff_parser = diff_parser
|
self._diff_parser = diff_parser
|
||||||
self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
def parse(self, code=None, *, error_recovery=True, path=None,
|
def parse(self,
|
||||||
start_symbol=None, cache=False, diff_cache=False,
|
code: Union[str, bytes] = None,
|
||||||
cache_path=None, file_io=None):
|
*,
|
||||||
|
error_recovery=True,
|
||||||
|
path: str = None,
|
||||||
|
start_symbol: str = None,
|
||||||
|
cache=False,
|
||||||
|
diff_cache=False,
|
||||||
|
cache_path: str = None,
|
||||||
|
file_io: FileIO = None) -> _NodeT:
|
||||||
"""
|
"""
|
||||||
If you want to parse a Python file you want to start here, most likely.
|
If you want to parse a Python file you want to start here, most likely.
|
||||||
|
|
||||||
@@ -95,7 +108,7 @@ class Grammar(object):
|
|||||||
if cache and file_io.path is not None:
|
if cache and file_io.path is not None:
|
||||||
module_node = load_module(self._hashed, file_io, cache_path=cache_path)
|
module_node = load_module(self._hashed, file_io, cache_path=cache_path)
|
||||||
if module_node is not None:
|
if module_node is not None:
|
||||||
return module_node
|
return module_node # type: ignore
|
||||||
|
|
||||||
if code is None:
|
if code is None:
|
||||||
code = file_io.read()
|
code = file_io.read()
|
||||||
@@ -114,7 +127,7 @@ class Grammar(object):
|
|||||||
module_node = module_cache_item.node
|
module_node = module_cache_item.node
|
||||||
old_lines = module_cache_item.lines
|
old_lines = module_cache_item.lines
|
||||||
if old_lines == lines:
|
if old_lines == lines:
|
||||||
return module_node
|
return module_node # type: ignore
|
||||||
|
|
||||||
new_node = self._diff_parser(
|
new_node = self._diff_parser(
|
||||||
self._pgen_grammar, self._tokenizer, module_node
|
self._pgen_grammar, self._tokenizer, module_node
|
||||||
@@ -126,7 +139,7 @@ class Grammar(object):
|
|||||||
# Never pickle in pypy, it's slow as hell.
|
# Never pickle in pypy, it's slow as hell.
|
||||||
pickling=cache and not is_pypy,
|
pickling=cache and not is_pypy,
|
||||||
cache_path=cache_path)
|
cache_path=cache_path)
|
||||||
return new_node
|
return new_node # type: ignore
|
||||||
|
|
||||||
tokens = self._tokenizer(lines)
|
tokens = self._tokenizer(lines)
|
||||||
|
|
||||||
@@ -142,7 +155,7 @@ class Grammar(object):
|
|||||||
# Never pickle in pypy, it's slow as hell.
|
# Never pickle in pypy, it's slow as hell.
|
||||||
pickling=cache and not is_pypy,
|
pickling=cache and not is_pypy,
|
||||||
cache_path=cache_path)
|
cache_path=cache_path)
|
||||||
return root_node
|
return root_node # type: ignore
|
||||||
|
|
||||||
def _get_token_namespace(self):
|
def _get_token_namespace(self):
|
||||||
ns = self._token_namespace
|
ns = self._token_namespace
|
||||||
@@ -196,7 +209,7 @@ class PythonGrammar(Grammar):
|
|||||||
_token_namespace = PythonTokenTypes
|
_token_namespace = PythonTokenTypes
|
||||||
_start_nonterminal = 'file_input'
|
_start_nonterminal = 'file_input'
|
||||||
|
|
||||||
def __init__(self, version_info, bnf_text):
|
def __init__(self, version_info: PythonVersionInfo, bnf_text: str):
|
||||||
super(PythonGrammar, self).__init__(
|
super(PythonGrammar, self).__init__(
|
||||||
bnf_text,
|
bnf_text,
|
||||||
tokenizer=self._tokenize_lines,
|
tokenizer=self._tokenize_lines,
|
||||||
@@ -213,7 +226,7 @@ class PythonGrammar(Grammar):
|
|||||||
return tokenize(code, version_info=self.version_info)
|
return tokenize(code, version_info=self.version_info)
|
||||||
|
|
||||||
|
|
||||||
def load_grammar(*, language='python', version=None, path=None):
|
def load_grammar(*, language: str = 'python', version: str = None, path: str = None):
|
||||||
"""
|
"""
|
||||||
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
Loads a :py:class:`parso.Grammar`. The default version is the current Python
|
||||||
version.
|
version.
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union
|
|
||||||
from typing_extensions import Literal
|
|
||||||
|
|
||||||
from parso.utils import PythonVersionInfo
|
|
||||||
|
|
||||||
_Token = Any
|
|
||||||
_NodeT = TypeVar("_NodeT")
|
|
||||||
|
|
||||||
class Grammar(Generic[_NodeT]):
|
|
||||||
_default_normalizer_config: Optional[Any] = ...
|
|
||||||
_error_normalizer_config: Optional[Any] = None
|
|
||||||
_start_nonterminal: str = ...
|
|
||||||
_token_namespace: Optional[str] = None
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
text: str,
|
|
||||||
tokenizer: Callable[[Sequence[str], int], Sequence[_Token]],
|
|
||||||
parser: Any = ...,
|
|
||||||
diff_parser: Any = ...,
|
|
||||||
) -> None: ...
|
|
||||||
def parse(
|
|
||||||
self,
|
|
||||||
code: Union[str, bytes] = ...,
|
|
||||||
error_recovery: bool = ...,
|
|
||||||
path: Optional[str] = ...,
|
|
||||||
start_symbol: Optional[str] = ...,
|
|
||||||
cache: bool = ...,
|
|
||||||
diff_cache: bool = ...,
|
|
||||||
cache_path: Optional[str] = ...,
|
|
||||||
) -> _NodeT: ...
|
|
||||||
|
|
||||||
class PythonGrammar(Grammar):
|
|
||||||
version_info: PythonVersionInfo
|
|
||||||
def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ...
|
|
||||||
|
|
||||||
def load_grammar(
|
|
||||||
language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ...
|
|
||||||
) -> Grammar: ...
|
|
||||||
+1
-1
@@ -23,7 +23,7 @@ within the statement. This lowers memory usage and cpu time and reduces the
|
|||||||
complexity of the ``Parser`` (there's another parser sitting inside
|
complexity of the ``Parser`` (there's another parser sitting inside
|
||||||
``Statement``, which produces ``Array`` and ``Call``).
|
``Statement``, which produces ``Array`` and ``Call``).
|
||||||
"""
|
"""
|
||||||
from typing import Dict
|
from typing import Dict, Generic, TypeVar
|
||||||
|
|
||||||
from parso import tree
|
from parso import tree
|
||||||
from parso.pgen2.generator import ReservedString
|
from parso.pgen2.generator import ReservedString
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from codecs import BOM_UTF8
|
|||||||
from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
|
from typing import NamedTuple, Tuple, Iterator, Iterable, List, Dict, \
|
||||||
Pattern, Set
|
Pattern, Set
|
||||||
|
|
||||||
from parso.python.token import PythonTokenTypes, TokenType
|
from parso.python.token import PythonTokenTypes
|
||||||
from parso.utils import split_lines, PythonVersionInfo, parse_version_string
|
from parso.utils import split_lines, PythonVersionInfo, parse_version_string
|
||||||
|
|
||||||
|
|
||||||
@@ -226,7 +226,7 @@ def _create_token_collection(version_info):
|
|||||||
|
|
||||||
|
|
||||||
class Token(NamedTuple):
|
class Token(NamedTuple):
|
||||||
type: TokenType
|
type: PythonTokenTypes
|
||||||
string: str
|
string: str
|
||||||
start_pos: Tuple[int, int]
|
start_pos: Tuple[int, int]
|
||||||
prefix: str
|
prefix: str
|
||||||
|
|||||||
+25
-26
@@ -106,9 +106,6 @@ def python_bytes_to_unicode(
|
|||||||
return source
|
return source
|
||||||
|
|
||||||
encoding = detect_encoding()
|
encoding = detect_encoding()
|
||||||
if not isinstance(encoding, str):
|
|
||||||
encoding = str(encoding, 'utf-8', 'replace')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Cast to unicode
|
# Cast to unicode
|
||||||
return str(source, encoding, errors)
|
return str(source, encoding, errors)
|
||||||
@@ -132,32 +129,13 @@ def version_info() -> Version:
|
|||||||
return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
|
return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
|
||||||
|
|
||||||
|
|
||||||
def _parse_version(version):
|
class _PythonVersionInfo(NamedTuple):
|
||||||
match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)
|
|
||||||
if match is None:
|
|
||||||
raise ValueError('The given version is not in the right format. '
|
|
||||||
'Use something like "3.8" or "3".')
|
|
||||||
|
|
||||||
major = int(match.group(1))
|
|
||||||
minor = match.group(2)
|
|
||||||
if minor is None:
|
|
||||||
# Use the latest Python in case it's not exactly defined, because the
|
|
||||||
# grammars are typically backwards compatible?
|
|
||||||
if major == 2:
|
|
||||||
minor = "7"
|
|
||||||
elif major == 3:
|
|
||||||
minor = "6"
|
|
||||||
else:
|
|
||||||
raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
|
|
||||||
minor = int(minor)
|
|
||||||
return PythonVersionInfo(major, minor)
|
|
||||||
|
|
||||||
|
|
||||||
@total_ordering
|
|
||||||
class PythonVersionInfo(NamedTuple):
|
|
||||||
major: int
|
major: int
|
||||||
minor: int
|
minor: int
|
||||||
|
|
||||||
|
|
||||||
|
@total_ordering
|
||||||
|
class PythonVersionInfo(_PythonVersionInfo):
|
||||||
def __gt__(self, other):
|
def __gt__(self, other):
|
||||||
if isinstance(other, tuple):
|
if isinstance(other, tuple):
|
||||||
if len(other) != 2:
|
if len(other) != 2:
|
||||||
@@ -178,6 +156,27 @@ class PythonVersionInfo(NamedTuple):
|
|||||||
return not self.__eq__(other)
|
return not self.__eq__(other)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_version(version) -> PythonVersionInfo:
|
||||||
|
match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version)
|
||||||
|
if match is None:
|
||||||
|
raise ValueError('The given version is not in the right format. '
|
||||||
|
'Use something like "3.8" or "3".')
|
||||||
|
|
||||||
|
major = int(match.group(1))
|
||||||
|
minor = match.group(2)
|
||||||
|
if minor is None:
|
||||||
|
# Use the latest Python in case it's not exactly defined, because the
|
||||||
|
# grammars are typically backwards compatible?
|
||||||
|
if major == 2:
|
||||||
|
minor = "7"
|
||||||
|
elif major == 3:
|
||||||
|
minor = "6"
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
|
||||||
|
minor = int(minor)
|
||||||
|
return PythonVersionInfo(major, minor)
|
||||||
|
|
||||||
|
|
||||||
def parse_version_string(version: str = None) -> PythonVersionInfo:
|
def parse_version_string(version: str = None) -> PythonVersionInfo:
|
||||||
"""
|
"""
|
||||||
Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and
|
Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and
|
||||||
|
|||||||
Reference in New Issue
Block a user