mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-11 07:01:59 +08:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
726ebade70 | ||
|
|
f2c257d7dd | ||
|
|
25941bbfb8 | ||
|
|
29fa0d27fc | ||
|
|
775679d481 | ||
|
|
51a6328096 | ||
|
|
89501e6fa5 | ||
|
|
649d2bebbc | ||
|
|
5fab429163 | ||
|
|
7d3438c94d | ||
|
|
3a0cd6d377 | ||
|
|
4ca92861c6 | ||
|
|
c734df407b | ||
|
|
96252145a7 | ||
|
|
23d1470618 | ||
|
|
973f7c5f61 | ||
|
|
a5b9177d4f |
@@ -3,7 +3,7 @@
|
|||||||
set -eu -o pipefail
|
set -eu -o pipefail
|
||||||
|
|
||||||
# Create tag
|
# Create tag
|
||||||
git tag $(python -c 'import parso; print(parso.__version__)')
|
git tag v$(python -c 'import parso; print(parso.__version__)')
|
||||||
git push --tags
|
git push --tags
|
||||||
|
|
||||||
# Package and upload to PyPI
|
# Package and upload to PyPI
|
||||||
|
|||||||
@@ -1,11 +1,33 @@
|
|||||||
|
"""
|
||||||
|
parso is a Python parser. It's really easy to use and supports multiple Python
|
||||||
|
versions, file caching, round-trips and other stuff:
|
||||||
|
|
||||||
|
>>> from parso import load_python_grammar
|
||||||
|
>>> grammar = load_python_grammar(version='2.7')
|
||||||
|
>>> module = grammar.parse('hello + 1')
|
||||||
|
>>> stmt = module.children[0]
|
||||||
|
>>> stmt
|
||||||
|
PythonNode(simple_stmt, [PythonNode(arith_expr, [...]), <Newline: ''>])
|
||||||
|
>>> stmt.get_code()
|
||||||
|
'hello + 1'
|
||||||
|
>>> name = stmt.children[0].children[0]
|
||||||
|
>>> name
|
||||||
|
<Name: hello@1,0>
|
||||||
|
>>> name.end_pos
|
||||||
|
(1, 5)
|
||||||
|
"""
|
||||||
|
|
||||||
from parso.parser import ParserSyntaxError
|
from parso.parser import ParserSyntaxError
|
||||||
from parso.pgen2.pgen import generate_grammar
|
from parso.grammar import create_grammar, load_python_grammar
|
||||||
from parso import python
|
|
||||||
|
|
||||||
|
|
||||||
__version__ = '0.0.1'
|
def parse(code=None, **kwargs):
|
||||||
|
"""
|
||||||
|
A utility function to parse Python with the current Python version. Params
|
||||||
|
are documented in ``Grammar.parse``.
|
||||||
|
"""
|
||||||
|
grammar = load_python_grammar()
|
||||||
|
return grammar.parse(code, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def parse(grammar, code):
|
__version__ = '0.0.2'
|
||||||
raise NotImplementedError
|
|
||||||
Parser(grammar, code)
|
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ def _get_hashed_path(grammar, path, cache_path=None):
|
|||||||
directory = _get_cache_directory_path(cache_path=cache_path)
|
directory = _get_cache_directory_path(cache_path=cache_path)
|
||||||
|
|
||||||
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
|
file_hash = hashlib.sha256(path.encode("utf-8")).hexdigest()
|
||||||
return os.path.join(directory, '%s-%s.pkl' % (grammar.sha256, file_hash))
|
return os.path.join(directory, '%s-%s.pkl' % (grammar._sha256, file_hash))
|
||||||
|
|
||||||
|
|
||||||
def _get_cache_directory_path(cache_path=None):
|
def _get_cache_directory_path(cache_path=None):
|
||||||
|
|||||||
163
parso/grammar.py
Normal file
163
parso/grammar.py
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
|
||||||
|
from parso._compatibility import FileNotFoundError
|
||||||
|
from parso.pgen2.pgen import generate_grammar
|
||||||
|
from parso.utils import splitlines, source_to_unicode
|
||||||
|
from parso.python.parser import Parser, remove_last_newline
|
||||||
|
from parso.python.diff import DiffParser
|
||||||
|
from parso.tokenize import generate_tokens
|
||||||
|
from parso.cache import parser_cache, load_module, save_module
|
||||||
|
from parso.parser import BaseParser
|
||||||
|
from parso.python.parser import Parser as PythonParser
|
||||||
|
|
||||||
|
_loaded_grammars = {}
|
||||||
|
|
||||||
|
|
||||||
|
class Grammar(object):
|
||||||
|
def __init__(self, bnf_text, tokenizer, parser, diff_parser=None):
|
||||||
|
self._pgen_grammar = generate_grammar(bnf_text)
|
||||||
|
self._parser = parser
|
||||||
|
self._tokenizer = tokenizer
|
||||||
|
self._diff_parser = diff_parser
|
||||||
|
self._sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
def parse(self, code=None, **kwargs):
|
||||||
|
"""
|
||||||
|
If you want to parse a Python file you want to start here, most likely.
|
||||||
|
|
||||||
|
If you need finer grained control over the parsed instance, there will be
|
||||||
|
other ways to access it.
|
||||||
|
|
||||||
|
:param code str: A unicode string that contains Python code.
|
||||||
|
:param path str: The path to the file you want to open. Only needed for caching.
|
||||||
|
:param error_recovery bool: If enabled, any code will be returned. If
|
||||||
|
it is invalid, it will be returned as an error node. If disabled,
|
||||||
|
you will get a ParseError when encountering syntax errors in your
|
||||||
|
code.
|
||||||
|
:param start_symbol str: The grammar symbol that you want to parse. Only
|
||||||
|
allowed to be used when error_recovery is False.
|
||||||
|
:param cache bool: A Python grammar file, created with load_grammar.
|
||||||
|
You may not specify it. In that case it's the current Python version.
|
||||||
|
:param diff_cache bool: Diffs the cached python module against the new
|
||||||
|
code and tries to parse only the parts that have changed. Returns
|
||||||
|
the same (changed) module that is found in cache. Using this option
|
||||||
|
requires you to not do anything anymore with the old cached module,
|
||||||
|
because the contents of it might have changed.
|
||||||
|
:param cache_path bool: If given saves the parso cache in this
|
||||||
|
directory. If not given, defaults to the default cache places on
|
||||||
|
each platform.
|
||||||
|
|
||||||
|
:return: A syntax tree node. Typically the module.
|
||||||
|
"""
|
||||||
|
return self._parse(code=code, **kwargs)
|
||||||
|
|
||||||
|
def _parse(self, code=None, path=None, error_recovery=True,
|
||||||
|
start_symbol='file_input', cache=False, diff_cache=False,
|
||||||
|
cache_path=None):
|
||||||
|
"""
|
||||||
|
Wanted python3.5 * operator and keyword only arguments. Therefore just
|
||||||
|
wrap it all.
|
||||||
|
"""
|
||||||
|
if code is None and path is None:
|
||||||
|
raise TypeError("Please provide either code or a path.")
|
||||||
|
if error_recovery and start_symbol != 'file_input':
|
||||||
|
raise NotImplementedError("This is currently not implemented.")
|
||||||
|
|
||||||
|
if cache and code is None and path is not None:
|
||||||
|
# With the current architecture we cannot load from cache if the
|
||||||
|
# code is given, because we just load from cache if it's not older than
|
||||||
|
# the latest change (file last modified).
|
||||||
|
module_node = load_module(self, path, cache_path=cache_path)
|
||||||
|
if module_node is not None:
|
||||||
|
return module_node
|
||||||
|
|
||||||
|
if code is None:
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
code = source_to_unicode(f.read())
|
||||||
|
|
||||||
|
lines = tokenize_lines = splitlines(code, keepends=True)
|
||||||
|
if diff_cache:
|
||||||
|
try:
|
||||||
|
module_cache_item = parser_cache[path]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
module_node = module_cache_item.node
|
||||||
|
old_lines = module_cache_item.lines
|
||||||
|
if old_lines == lines:
|
||||||
|
# TODO remove this line? I think it's not needed. (dave)
|
||||||
|
save_module(self, path, module_node, lines, pickling=False,
|
||||||
|
cache_path=cache_path)
|
||||||
|
return module_node
|
||||||
|
|
||||||
|
new_node = DiffParser(self._pgen_grammar, module_node).update(
|
||||||
|
old_lines=old_lines,
|
||||||
|
new_lines=lines
|
||||||
|
)
|
||||||
|
save_module(self, path, new_node, lines, pickling=cache,
|
||||||
|
cache_path=cache_path)
|
||||||
|
return new_node
|
||||||
|
|
||||||
|
added_newline = not code.endswith('\n')
|
||||||
|
if added_newline:
|
||||||
|
code += '\n'
|
||||||
|
tokenize_lines = list(tokenize_lines)
|
||||||
|
tokenize_lines[-1] += '\n'
|
||||||
|
tokenize_lines.append('')
|
||||||
|
|
||||||
|
tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
|
||||||
|
|
||||||
|
p = Parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
|
||||||
|
root_node = p.parse(tokens=tokens)
|
||||||
|
if added_newline:
|
||||||
|
remove_last_newline(root_node)
|
||||||
|
|
||||||
|
if cache or diff_cache:
|
||||||
|
save_module(self, path, root_node, lines, pickling=cache,
|
||||||
|
cache_path=cache_path)
|
||||||
|
return root_node
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
labels = self._pgen_grammar.symbol2number.values()
|
||||||
|
txt = ' '.join(list(labels)[:3]) + ' ...'
|
||||||
|
return '<%s:%s>' % (self.__class__.__name__, txt)
|
||||||
|
|
||||||
|
|
||||||
|
def create_grammar(text, tokenizer=generate_tokens, parser=BaseParser):
|
||||||
|
"""
|
||||||
|
:param text: A BNF representation of your grammar.
|
||||||
|
"""
|
||||||
|
return Grammar(text, tokenizer=tokenizer, parser=parser)
|
||||||
|
|
||||||
|
|
||||||
|
def load_python_grammar(version=None):
|
||||||
|
"""
|
||||||
|
Loads a Python grammar. The default version is always the latest.
|
||||||
|
|
||||||
|
If you need support for a specific version, please use e.g.
|
||||||
|
`version='3.3'`.
|
||||||
|
"""
|
||||||
|
if version is None:
|
||||||
|
version = '3.6'
|
||||||
|
|
||||||
|
if version in ('3.2', '3.3'):
|
||||||
|
version = '3.4'
|
||||||
|
elif version == '2.6':
|
||||||
|
version = '2.7'
|
||||||
|
|
||||||
|
file = 'python/grammar' + version + '.txt'
|
||||||
|
|
||||||
|
global _loaded_grammars
|
||||||
|
path = os.path.join(os.path.dirname(__file__), file)
|
||||||
|
try:
|
||||||
|
return _loaded_grammars[path]
|
||||||
|
except KeyError:
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
bnf_text = f.read()
|
||||||
|
grammar = create_grammar(bnf_text, parser=PythonParser)
|
||||||
|
return _loaded_grammars.setdefault(path, grammar)
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Just load the default if the file does not exist.
|
||||||
|
return load_python_grammar()
|
||||||
@@ -38,15 +38,15 @@ class BaseParser(object):
|
|||||||
}
|
}
|
||||||
default_leaf = tree.Leaf
|
default_leaf = tree.Leaf
|
||||||
|
|
||||||
def __init__(self, grammar, start_symbol='file_input', error_recovery=False):
|
def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
|
||||||
self._grammar = grammar
|
self._pgen_grammar = pgen_grammar
|
||||||
self._start_symbol = start_symbol
|
self._start_symbol = start_symbol
|
||||||
self._error_recovery = error_recovery
|
self._error_recovery = error_recovery
|
||||||
|
|
||||||
def parse(self, tokens):
|
def parse(self, tokens):
|
||||||
start_number = self._grammar.symbol2number[self._start_symbol]
|
start_number = self._pgen_grammar.symbol2number[self._start_symbol]
|
||||||
self.pgen_parser = PgenParser(
|
self.pgen_parser = PgenParser(
|
||||||
self._grammar, self.convert_node, self.convert_leaf,
|
self._pgen_grammar, self.convert_node, self.convert_leaf,
|
||||||
self.error_recovery, start_number
|
self.error_recovery, start_number
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -55,22 +55,22 @@ class BaseParser(object):
|
|||||||
del self.pgen_parser
|
del self.pgen_parser
|
||||||
return node
|
return node
|
||||||
|
|
||||||
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
|
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
add_token_callback):
|
add_token_callback):
|
||||||
if self._error_recovery:
|
if self._error_recovery:
|
||||||
raise NotImplementedError("Error Recovery is not implemented")
|
raise NotImplementedError("Error Recovery is not implemented")
|
||||||
else:
|
else:
|
||||||
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
|
raise ParserSyntaxError('SyntaxError: invalid syntax', start_pos)
|
||||||
|
|
||||||
def convert_node(self, grammar, type_, children):
|
def convert_node(self, pgen_grammar, type_, children):
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
# TODO REMOVE symbol, we don't want type here.
|
||||||
symbol = grammar.number2symbol[type_]
|
symbol = pgen_grammar.number2symbol[type_]
|
||||||
try:
|
try:
|
||||||
return self.node_map[symbol](children)
|
return self.node_map[symbol](children)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return self.default_node(symbol, children)
|
return self.default_node(symbol, children)
|
||||||
|
|
||||||
def convert_leaf(self, grammar, type_, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
|
||||||
try:
|
try:
|
||||||
return self.leaf_map[type_](value, start_pos, prefix)
|
return self.leaf_map[type_](value, start_pos, prefix)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
|
|||||||
@@ -17,8 +17,6 @@ fallback token code OP, but the parser needs the actual token code.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
import hashlib
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Grammar(object):
|
class Grammar(object):
|
||||||
@@ -85,7 +83,6 @@ class Grammar(object):
|
|||||||
self.tokens = {}
|
self.tokens = {}
|
||||||
self.symbol2label = {}
|
self.symbol2label = {}
|
||||||
self.start = 256
|
self.start = 256
|
||||||
self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
|
|
||||||
|
|
||||||
def dump(self, filename):
|
def dump(self, filename):
|
||||||
"""Dump the grammar tables to a pickle file."""
|
"""Dump the grammar tables to a pickle file."""
|
||||||
|
|||||||
@@ -1,144 +0,0 @@
|
|||||||
"""
|
|
||||||
Parsers for Python
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
|
|
||||||
from parso.utils import splitlines, source_to_unicode
|
|
||||||
from parso._compatibility import FileNotFoundError
|
|
||||||
from parso.pgen2.pgen import generate_grammar
|
|
||||||
from parso.python.parser import Parser, remove_last_newline
|
|
||||||
from parso.python.diff import DiffParser
|
|
||||||
from parso.tokenize import generate_tokens
|
|
||||||
from parso.cache import parser_cache, load_module, save_module
|
|
||||||
|
|
||||||
|
|
||||||
_loaded_grammars = {}
|
|
||||||
|
|
||||||
|
|
||||||
def load_grammar(version=None):
|
|
||||||
"""
|
|
||||||
Loads a Python grammar. The default version is always the latest.
|
|
||||||
|
|
||||||
If you need support for a specific version, please use e.g.
|
|
||||||
`version='3.3'`.
|
|
||||||
"""
|
|
||||||
if version is None:
|
|
||||||
version = '3.6'
|
|
||||||
|
|
||||||
if version in ('3.2', '3.3'):
|
|
||||||
version = '3.4'
|
|
||||||
elif version == '2.6':
|
|
||||||
version = '2.7'
|
|
||||||
|
|
||||||
file = 'grammar' + version + '.txt'
|
|
||||||
|
|
||||||
global _loaded_grammars
|
|
||||||
path = os.path.join(os.path.dirname(__file__), file)
|
|
||||||
try:
|
|
||||||
return _loaded_grammars[path]
|
|
||||||
except KeyError:
|
|
||||||
try:
|
|
||||||
with open(path) as f:
|
|
||||||
bnf_text = f.read()
|
|
||||||
grammar = generate_grammar(bnf_text)
|
|
||||||
return _loaded_grammars.setdefault(path, grammar)
|
|
||||||
except FileNotFoundError:
|
|
||||||
# Just load the default if the file does not exist.
|
|
||||||
return load_grammar()
|
|
||||||
|
|
||||||
|
|
||||||
def parse(code=None, **kwargs):
|
|
||||||
"""
|
|
||||||
If you want to parse a Python file you want to start here, most likely.
|
|
||||||
|
|
||||||
If you need finer grained control over the parsed instance, there will be
|
|
||||||
other ways to access it.
|
|
||||||
|
|
||||||
:param code: A unicode string that contains Python code.
|
|
||||||
:param path: The path to the file you want to open. Only needed for caching.
|
|
||||||
:param grammar: A Python grammar file, created with load_grammar. You may
|
|
||||||
not specify it. In that case it's the current Python version.
|
|
||||||
:param error_recovery: If enabled, any code will be returned. If it is
|
|
||||||
invalid, it will be returned as an error node. If disabled, you will
|
|
||||||
get a ParseError when encountering syntax errors in your code.
|
|
||||||
:param start_symbol: The grammar symbol that you want to parse. Only
|
|
||||||
allowed to be used when error_recovery is disabled.
|
|
||||||
:param cache_path: If given saves the parso cache in this directory. If not
|
|
||||||
given, defaults to the default cache places on each platform.
|
|
||||||
|
|
||||||
:return: A syntax tree node. Typically the module.
|
|
||||||
"""
|
|
||||||
# Wanted python3.5 * operator and keyword only arguments.
|
|
||||||
path = kwargs.pop('path', None)
|
|
||||||
grammar = kwargs.pop('grammar', None)
|
|
||||||
error_recovery = kwargs.pop('error_recovery', True)
|
|
||||||
start_symbol = kwargs.pop('start_symbol', 'file_input')
|
|
||||||
cache = kwargs.pop('cache', False)
|
|
||||||
diff_cache = kwargs.pop('diff_cache', False)
|
|
||||||
cache_path = kwargs.pop('cache_path', None)
|
|
||||||
|
|
||||||
if kwargs:
|
|
||||||
raise TypeError(
|
|
||||||
"parse() got an unexpected keyword argument '%s'"
|
|
||||||
% next(iter(kwargs)))
|
|
||||||
|
|
||||||
# Start with actual code.
|
|
||||||
if code is None and path is None:
|
|
||||||
raise TypeError("Please provide either code or a path.")
|
|
||||||
|
|
||||||
if grammar is None:
|
|
||||||
grammar = load_grammar()
|
|
||||||
|
|
||||||
if cache and code is None and path is not None:
|
|
||||||
# With the current architecture we cannot load from cache if the
|
|
||||||
# code is given, because we just load from cache if it's not older than
|
|
||||||
# the latest change (file last modified).
|
|
||||||
module_node = load_module(grammar, path, cache_path=cache_path)
|
|
||||||
if module_node is not None:
|
|
||||||
return module_node
|
|
||||||
|
|
||||||
if code is None:
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
code = source_to_unicode(f.read())
|
|
||||||
|
|
||||||
lines = tokenize_lines = splitlines(code, keepends=True)
|
|
||||||
if diff_cache:
|
|
||||||
try:
|
|
||||||
module_cache_item = parser_cache[path]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
module_node = module_cache_item.node
|
|
||||||
old_lines = module_cache_item.lines
|
|
||||||
if old_lines == lines:
|
|
||||||
# TODO remove this line? I think it's not needed. (dave)
|
|
||||||
save_module(grammar, path, module_node, lines, pickling=False,
|
|
||||||
cache_path=cache_path)
|
|
||||||
return module_node
|
|
||||||
|
|
||||||
new_node = DiffParser(grammar, module_node).update(
|
|
||||||
old_lines=old_lines,
|
|
||||||
new_lines=lines
|
|
||||||
)
|
|
||||||
save_module(grammar, path, new_node, lines, pickling=cache,
|
|
||||||
cache_path=cache_path)
|
|
||||||
return new_node
|
|
||||||
|
|
||||||
added_newline = not code.endswith('\n')
|
|
||||||
if added_newline:
|
|
||||||
code += '\n'
|
|
||||||
tokenize_lines = list(tokenize_lines)
|
|
||||||
tokenize_lines[-1] += '\n'
|
|
||||||
tokenize_lines.append('')
|
|
||||||
|
|
||||||
tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
|
|
||||||
|
|
||||||
p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
|
|
||||||
root_node = p.parse(tokens=tokens)
|
|
||||||
if added_newline:
|
|
||||||
remove_last_newline(root_node)
|
|
||||||
|
|
||||||
if cache or diff_cache:
|
|
||||||
save_module(grammar, path, root_node, lines, pickling=cache,
|
|
||||||
cache_path=cache_path)
|
|
||||||
return root_node
|
|
||||||
|
|||||||
@@ -34,24 +34,24 @@ def _ends_with_newline(leaf, suffix=''):
|
|||||||
return typ == 'newline' or suffix.endswith('\n')
|
return typ == 'newline' or suffix.endswith('\n')
|
||||||
|
|
||||||
|
|
||||||
def _flows_finished(grammar, stack):
|
def _flows_finished(pgen_grammar, stack):
|
||||||
"""
|
"""
|
||||||
if, while, for and try might not be finished, because another part might
|
if, while, for and try might not be finished, because another part might
|
||||||
still be parsed.
|
still be parsed.
|
||||||
"""
|
"""
|
||||||
for dfa, newstate, (symbol_number, nodes) in stack:
|
for dfa, newstate, (symbol_number, nodes) in stack:
|
||||||
if grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
|
if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
|
||||||
'for_stmt', 'try_stmt'):
|
'for_stmt', 'try_stmt'):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def suite_or_file_input_is_valid(grammar, stack):
|
def suite_or_file_input_is_valid(pgen_grammar, stack):
|
||||||
if not _flows_finished(grammar, stack):
|
if not _flows_finished(pgen_grammar, stack):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
for dfa, newstate, (symbol_number, nodes) in reversed(stack):
|
||||||
if grammar.number2symbol[symbol_number] == 'suite':
|
if pgen_grammar.number2symbol[symbol_number] == 'suite':
|
||||||
# If only newline is in the suite, the suite is not valid, yet.
|
# If only newline is in the suite, the suite is not valid, yet.
|
||||||
return len(nodes) > 1
|
return len(nodes) > 1
|
||||||
# Not reaching a suite means that we're dealing with file_input levels
|
# Not reaching a suite means that we're dealing with file_input levels
|
||||||
@@ -89,8 +89,8 @@ class DiffParser(object):
|
|||||||
An advanced form of parsing a file faster. Unfortunately comes with huge
|
An advanced form of parsing a file faster. Unfortunately comes with huge
|
||||||
side effects. It changes the given module.
|
side effects. It changes the given module.
|
||||||
"""
|
"""
|
||||||
def __init__(self, grammar, module):
|
def __init__(self, pgen_grammar, module):
|
||||||
self._grammar = grammar
|
self._pgen_grammar = pgen_grammar
|
||||||
self._module = module
|
self._module = module
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
@@ -299,7 +299,7 @@ class DiffParser(object):
|
|||||||
line_offset=parsed_until_line
|
line_offset=parsed_until_line
|
||||||
)
|
)
|
||||||
self._active_parser = Parser(
|
self._active_parser = Parser(
|
||||||
self._grammar,
|
self._pgen_grammar,
|
||||||
error_recovery=True
|
error_recovery=True
|
||||||
)
|
)
|
||||||
return self._active_parser.parse(tokens=tokens)
|
return self._active_parser.parse(tokens=tokens)
|
||||||
@@ -338,7 +338,7 @@ class DiffParser(object):
|
|||||||
elif typ == NEWLINE and start_pos[0] >= until_line:
|
elif typ == NEWLINE and start_pos[0] >= until_line:
|
||||||
yield TokenInfo(typ, string, start_pos, prefix)
|
yield TokenInfo(typ, string, start_pos, prefix)
|
||||||
# Check if the parser is actually in a valid suite state.
|
# Check if the parser is actually in a valid suite state.
|
||||||
if suite_or_file_input_is_valid(self._grammar, stack):
|
if suite_or_file_input_is_valid(self._pgen_grammar, stack):
|
||||||
start_pos = start_pos[0] + 1, 0
|
start_pos = start_pos[0] + 1, 0
|
||||||
while len(indents) > int(omitted_first_indent):
|
while len(indents) > int(omitted_first_indent):
|
||||||
indents.pop()
|
indents.pop()
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ class Parser(BaseParser):
|
|||||||
This class is used to parse a Python file, it then divides them into a
|
This class is used to parse a Python file, it then divides them into a
|
||||||
class structure of different scopes.
|
class structure of different scopes.
|
||||||
|
|
||||||
:param grammar: The grammar object of pgen2. Loaded by load_grammar.
|
:param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
node_map = {
|
node_map = {
|
||||||
@@ -45,8 +45,8 @@ class Parser(BaseParser):
|
|||||||
}
|
}
|
||||||
default_node = tree.PythonNode
|
default_node = tree.PythonNode
|
||||||
|
|
||||||
def __init__(self, grammar, error_recovery=True, start_symbol='file_input'):
|
def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
|
||||||
super(Parser, self).__init__(grammar, start_symbol, error_recovery=error_recovery)
|
super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
|
||||||
|
|
||||||
self.syntax_errors = []
|
self.syntax_errors = []
|
||||||
self._omit_dedent_list = []
|
self._omit_dedent_list = []
|
||||||
@@ -75,14 +75,14 @@ class Parser(BaseParser):
|
|||||||
# If there's only one statement, we get back a non-module. That's
|
# If there's only one statement, we get back a non-module. That's
|
||||||
# not what we want, we want a module, so we add it here:
|
# not what we want, we want a module, so we add it here:
|
||||||
node = self.convert_node(
|
node = self.convert_node(
|
||||||
self._grammar,
|
self._pgen_grammar,
|
||||||
self._grammar.symbol2number['file_input'],
|
self._pgen_grammar.symbol2number['file_input'],
|
||||||
[node]
|
[node]
|
||||||
)
|
)
|
||||||
|
|
||||||
return node
|
return node
|
||||||
|
|
||||||
def convert_node(self, grammar, type, children):
|
def convert_node(self, pgen_grammar, type, children):
|
||||||
"""
|
"""
|
||||||
Convert raw node information to a PythonBaseNode instance.
|
Convert raw node information to a PythonBaseNode instance.
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ class Parser(BaseParser):
|
|||||||
strictly bottom-up.
|
strictly bottom-up.
|
||||||
"""
|
"""
|
||||||
# TODO REMOVE symbol, we don't want type here.
|
# TODO REMOVE symbol, we don't want type here.
|
||||||
symbol = grammar.number2symbol[type]
|
symbol = pgen_grammar.number2symbol[type]
|
||||||
try:
|
try:
|
||||||
return self.node_map[symbol](children)
|
return self.node_map[symbol](children)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
@@ -103,10 +103,10 @@ class Parser(BaseParser):
|
|||||||
children = [children[0]] + children[2:-1]
|
children = [children[0]] + children[2:-1]
|
||||||
return self.default_node(symbol, children)
|
return self.default_node(symbol, children)
|
||||||
|
|
||||||
def convert_leaf(self, grammar, type, value, prefix, start_pos):
|
def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
|
||||||
# print('leaf', repr(value), token.tok_name[type])
|
# print('leaf', repr(value), token.tok_name[type])
|
||||||
if type == tokenize.NAME:
|
if type == tokenize.NAME:
|
||||||
if value in grammar.keywords:
|
if value in pgen_grammar.keywords:
|
||||||
return tree.Keyword(value, start_pos, prefix)
|
return tree.Keyword(value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
return tree.Name(value, start_pos, prefix)
|
return tree.Name(value, start_pos, prefix)
|
||||||
@@ -121,7 +121,7 @@ class Parser(BaseParser):
|
|||||||
else:
|
else:
|
||||||
return tree.Operator(value, start_pos, prefix)
|
return tree.Operator(value, start_pos, prefix)
|
||||||
|
|
||||||
def error_recovery(self, grammar, stack, arcs, typ, value, start_pos, prefix,
|
def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
add_token_callback):
|
add_token_callback):
|
||||||
"""
|
"""
|
||||||
This parser is written in a dynamic way, meaning that this parser
|
This parser is written in a dynamic way, meaning that this parser
|
||||||
@@ -130,7 +130,7 @@ class Parser(BaseParser):
|
|||||||
"""
|
"""
|
||||||
if not self._error_recovery:
|
if not self._error_recovery:
|
||||||
return super(Parser, self).error_recovery(
|
return super(Parser, self).error_recovery(
|
||||||
grammar, stack, arcs, typ, value, start_pos, prefix,
|
pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
|
||||||
add_token_callback)
|
add_token_callback)
|
||||||
|
|
||||||
def current_suite(stack):
|
def current_suite(stack):
|
||||||
@@ -138,7 +138,7 @@ class Parser(BaseParser):
|
|||||||
# file_input, if we detect an error.
|
# file_input, if we detect an error.
|
||||||
for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
|
for index, (dfa, state, (type_, nodes)) in reversed(list(enumerate(stack))):
|
||||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||||
symbol = grammar.number2symbol[type_]
|
symbol = pgen_grammar.number2symbol[type_]
|
||||||
if symbol == 'file_input':
|
if symbol == 'file_input':
|
||||||
break
|
break
|
||||||
elif symbol == 'suite' and len(nodes) > 1:
|
elif symbol == 'suite' and len(nodes) > 1:
|
||||||
@@ -149,7 +149,7 @@ class Parser(BaseParser):
|
|||||||
index, symbol, nodes = current_suite(stack)
|
index, symbol, nodes = current_suite(stack)
|
||||||
|
|
||||||
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
# print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
|
||||||
if self._stack_removal(grammar, stack, arcs, index + 1, value, start_pos):
|
if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
|
||||||
add_token_callback(typ, value, start_pos, prefix)
|
add_token_callback(typ, value, start_pos, prefix)
|
||||||
else:
|
else:
|
||||||
if typ == INDENT:
|
if typ == INDENT:
|
||||||
@@ -160,7 +160,7 @@ class Parser(BaseParser):
|
|||||||
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
|
||||||
stack[-1][2][1].append(error_leaf)
|
stack[-1][2][1].append(error_leaf)
|
||||||
|
|
||||||
def _stack_removal(self, grammar, stack, arcs, start_index, value, start_pos):
|
def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos):
|
||||||
failed_stack = []
|
failed_stack = []
|
||||||
found = False
|
found = False
|
||||||
all_nodes = []
|
all_nodes = []
|
||||||
@@ -168,7 +168,7 @@ class Parser(BaseParser):
|
|||||||
if nodes:
|
if nodes:
|
||||||
found = True
|
found = True
|
||||||
if found:
|
if found:
|
||||||
symbol = grammar.number2symbol[typ]
|
symbol = pgen_grammar.number2symbol[typ]
|
||||||
failed_stack.append((symbol, nodes))
|
failed_stack.append((symbol, nodes))
|
||||||
all_nodes += nodes
|
all_nodes += nodes
|
||||||
if failed_stack:
|
if failed_stack:
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ the input given to the parser. This is important if you are using refactoring.
|
|||||||
The easiest way to play with this module is to use :class:`parsing.Parser`.
|
The easiest way to play with this module is to use :class:`parsing.Parser`.
|
||||||
:attr:`parsing.Parser.module` holds an instance of :class:`Module`:
|
:attr:`parsing.Parser.module` holds an instance of :class:`Module`:
|
||||||
|
|
||||||
>>> from parso.python import parse
|
>>> from parso import parse
|
||||||
>>> parser = parse('import os')
|
>>> parser = parse('import os')
|
||||||
>>> module = parser.get_root_node()
|
>>> module = parser.get_root_node()
|
||||||
>>> module
|
>>> module
|
||||||
|
|||||||
@@ -208,7 +208,7 @@ class Leaf(NodeOrLeaf):
|
|||||||
|
|
||||||
@utf8_repr
|
@utf8_repr
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<%s: %s start=%s>" % (type(self).__name__, self.value, self.start_pos)
|
return "<%s: %s>" % (type(self).__name__, self.value)
|
||||||
|
|
||||||
|
|
||||||
class BaseNode(NodeOrLeaf):
|
class BaseNode(NodeOrLeaf):
|
||||||
|
|||||||
10
setup.py
10
setup.py
@@ -1,16 +1,16 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
from setuptools import setup
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import parso
|
||||||
|
|
||||||
|
|
||||||
__AUTHOR__ = 'David Halter'
|
__AUTHOR__ = 'David Halter'
|
||||||
__AUTHOR_EMAIL__ = 'davidhalter88@gmail.com'
|
__AUTHOR_EMAIL__ = 'davidhalter88@gmail.com'
|
||||||
|
|
||||||
readme = open('README.rst').read() + '\n\n' + open('CHANGELOG.rst').read()
|
readme = open('README.rst').read() + '\n\n' + open('CHANGELOG.rst').read()
|
||||||
packages = ['parso', 'parso.pgen2', 'parso.python']
|
|
||||||
|
|
||||||
import parso
|
|
||||||
|
|
||||||
setup(name='parso',
|
setup(name='parso',
|
||||||
version=parso.__version__,
|
version=parso.__version__,
|
||||||
@@ -24,7 +24,7 @@ setup(name='parso',
|
|||||||
license='MIT',
|
license='MIT',
|
||||||
keywords='python parser parsing',
|
keywords='python parser parsing',
|
||||||
long_description=readme,
|
long_description=readme,
|
||||||
packages=packages,
|
packages=find_packages(exclude=['test']),
|
||||||
package_data={'parso': ['python/grammar*.txt']},
|
package_data={'parso': ['python/grammar*.txt']},
|
||||||
platforms=['any'],
|
platforms=['any'],
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
Tests ``from __future__ import absolute_import`` (only important for
|
Tests ``from __future__ import absolute_import`` (only important for
|
||||||
Python 2.X)
|
Python 2.X)
|
||||||
"""
|
"""
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
|
|
||||||
|
|
||||||
def test_explicit_absolute_imports():
|
def test_explicit_absolute_imports():
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import pytest
|
|||||||
|
|
||||||
from parso.cache import _NodeCacheItem, save_module, load_module, \
|
from parso.cache import _NodeCacheItem, save_module, load_module, \
|
||||||
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
|
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
|
||||||
from parso.python import load_grammar
|
from parso import load_python_grammar
|
||||||
from parso import cache
|
from parso import cache
|
||||||
|
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ def test_modulepickling_change_cache_dir(tmpdir):
|
|||||||
path_1 = 'fake path 1'
|
path_1 = 'fake path 1'
|
||||||
path_2 = 'fake path 2'
|
path_2 = 'fake path 2'
|
||||||
|
|
||||||
grammar = load_grammar()
|
grammar = load_python_grammar()
|
||||||
_save_to_file_system(grammar, path_1, item_1, cache_path=dir_1)
|
_save_to_file_system(grammar, path_1, item_1, cache_path=dir_1)
|
||||||
parser_cache.clear()
|
parser_cache.clear()
|
||||||
cached = load_stored_item(grammar, path_1, item_1, cache_path=dir_1)
|
cached = load_stored_item(grammar, path_1, item_1, cache_path=dir_1)
|
||||||
@@ -69,7 +69,7 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
|
|||||||
|
|
||||||
__ https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html
|
__ https://developer.apple.com/library/content/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/FileSystemOverview/FileSystemOverview.html
|
||||||
"""
|
"""
|
||||||
grammar = load_grammar()
|
grammar = load_python_grammar()
|
||||||
module = 'fake parser'
|
module = 'fake parser'
|
||||||
|
|
||||||
# Create the file
|
# Create the file
|
||||||
|
|||||||
@@ -5,9 +5,18 @@ import pytest
|
|||||||
|
|
||||||
from parso.utils import splitlines
|
from parso.utils import splitlines
|
||||||
from parso import cache
|
from parso import cache
|
||||||
from parso.python import load_grammar
|
from parso import load_python_grammar
|
||||||
from parso.python.diff import DiffParser
|
from parso.python.diff import DiffParser
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
|
|
||||||
|
|
||||||
|
def test_simple():
|
||||||
|
"""
|
||||||
|
The diff parser reuses modules. So check for that.
|
||||||
|
"""
|
||||||
|
grammar = load_python_grammar()
|
||||||
|
module_a = grammar.parse('a', diff_cache=True)
|
||||||
|
assert grammar.parse('b', diff_cache=True) == module_a
|
||||||
|
|
||||||
|
|
||||||
def _check_error_leaves_nodes(node):
|
def _check_error_leaves_nodes(node):
|
||||||
@@ -40,7 +49,7 @@ def _assert_valid_graph(node):
|
|||||||
|
|
||||||
|
|
||||||
class Differ(object):
|
class Differ(object):
|
||||||
grammar = load_grammar()
|
grammar = load_python_grammar()
|
||||||
|
|
||||||
def initialize(self, code):
|
def initialize(self, code):
|
||||||
logging.debug('differ: initialize')
|
logging.debug('differ: initialize')
|
||||||
@@ -52,7 +61,7 @@ class Differ(object):
|
|||||||
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
|
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
|
||||||
logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
|
logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
|
||||||
lines = splitlines(code, keepends=True)
|
lines = splitlines(code, keepends=True)
|
||||||
diff_parser = DiffParser(self.grammar, self.module)
|
diff_parser = DiffParser(self.grammar._pgen_grammar, self.module)
|
||||||
new_module = diff_parser.update(self.lines, lines)
|
new_module = diff_parser.update(self.lines, lines)
|
||||||
self.lines = lines
|
self.lines = lines
|
||||||
assert code == new_module.get_code()
|
assert code == new_module.get_code()
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import difflib
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
|
|
||||||
code_basic_features = '''
|
code_basic_features = '''
|
||||||
"""A mod docstring"""
|
"""A mod docstring"""
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ However the tests might still be relevant for the parser.
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from parso._compatibility import u
|
from parso._compatibility import u
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
|
|
||||||
|
|
||||||
def test_carriage_return_splitting():
|
def test_carriage_return_splitting():
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ instead of simple parser objects.
|
|||||||
|
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
|
|
||||||
|
|
||||||
def assert_params(param_string, **wanted_dct):
|
def assert_params(param_string, **wanted_dct):
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ from textwrap import dedent
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from parso._compatibility import u, py_version
|
from parso._compatibility import u, py_version
|
||||||
from parso.python import parse, load_grammar
|
from parso import parse
|
||||||
|
from parso import load_python_grammar
|
||||||
from parso.python import tree
|
from parso.python import tree
|
||||||
from parso.utils import splitlines
|
from parso.utils import splitlines
|
||||||
|
|
||||||
@@ -112,8 +113,8 @@ def test_param_splitting():
|
|||||||
"""
|
"""
|
||||||
def check(src, result):
|
def check(src, result):
|
||||||
# Python 2 tuple params should be ignored for now.
|
# Python 2 tuple params should be ignored for now.
|
||||||
grammar = load_grammar('%s.%s' % sys.version_info[:2])
|
grammar = load_python_grammar('%s.%s' % sys.version_info[:2])
|
||||||
m = parse(src, grammar=grammar)
|
m = grammar.parse(src)
|
||||||
if py_version >= 30:
|
if py_version >= 30:
|
||||||
assert not list(m.iter_funcdefs())
|
assert not list(m.iter_funcdefs())
|
||||||
else:
|
else:
|
||||||
@@ -160,10 +161,10 @@ def test_python3_octal():
|
|||||||
def test_load_newer_grammar():
|
def test_load_newer_grammar():
|
||||||
# This version shouldn't be out for a while, but if we somehow get this it
|
# This version shouldn't be out for a while, but if we somehow get this it
|
||||||
# should just take the latest Python grammar.
|
# should just take the latest Python grammar.
|
||||||
load_grammar('15.8')
|
load_python_grammar('15.8')
|
||||||
# The same is true for very old grammars (even though this is probably not
|
# The same is true for very old grammars (even though this is probably not
|
||||||
# going to be an issue.
|
# going to be an issue.
|
||||||
load_grammar('1.5')
|
load_python_grammar('1.5')
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar'])
|
@pytest.mark.parametrize('code', ['foo "', 'foo """\n', 'foo """\nbar'])
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from textwrap import dedent
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
from parso.python import tree
|
from parso.python import tree
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,15 +9,15 @@ test_grammar.py files from both Python 2 and Python 3.
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.python import parse as _parse, load_grammar
|
from parso import load_python_grammar
|
||||||
from parso import ParserSyntaxError
|
from parso import ParserSyntaxError
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
def parse(code, version='3.4'):
|
def parse(code, version='3.4'):
|
||||||
code = dedent(code) + "\n\n"
|
code = dedent(code) + "\n\n"
|
||||||
grammar = load_grammar(version=version)
|
grammar = load_python_grammar(version=version)
|
||||||
return _parse(code, grammar=grammar, error_recovery=False)
|
return grammar.parse(code, error_recovery=False)
|
||||||
|
|
||||||
|
|
||||||
def test_formfeed():
|
def test_formfeed():
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from parso._compatibility import py_version
|
|||||||
from parso.utils import splitlines
|
from parso.utils import splitlines
|
||||||
from parso.token import NAME, OP, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER
|
from parso.token import NAME, OP, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER
|
||||||
from parso import tokenize
|
from parso import tokenize
|
||||||
from parso.python import parse
|
from parso import parse
|
||||||
from parso.tokenize import TokenInfo
|
from parso.tokenize import TokenInfo
|
||||||
|
|
||||||
def _get_token_list(string):
|
def _get_token_list(string):
|
||||||
|
|||||||
Reference in New Issue
Block a user