Add NodeOrLeaf.dump() and NodeOrLeaf.search_ancestor() (#187)

- Add `NodeOrLeaf.dump()` to generate a readable and "round-trippable" dump for a parser tree
- `parso.tree.search_ancestor()` is deprecated, use `NodeOrLeaf.search_ancestor()` instead
- Set up children's parent in `BaseNode.__init__()`
- Add test for `search_ancestor`
- Various small type annotations improvements
This commit is contained in:
Saiyang Gou
2021-05-29 12:40:07 -07:00
committed by GitHub
parent f2b1ff9429
commit 86f3f1096b
11 changed files with 364 additions and 34 deletions

View File

@@ -4,6 +4,8 @@ source = parso
[report] [report]
# Regexes for lines to exclude from consideration # Regexes for lines to exclude from consideration
exclude_lines = exclude_lines =
pragma: no cover
# Don't complain about missing debug-only code: # Don't complain about missing debug-only code:
def __repr__ def __repr__

1
.gitignore vendored
View File

@@ -11,3 +11,4 @@ parso.egg-info/
/.pytest_cache /.pytest_cache
test/fuzz-redo.pickle test/fuzz-redo.pickle
/venv/ /venv/
/htmlcov/

View File

@@ -23,7 +23,7 @@ within the statement. This lowers memory usage and cpu time and reduces the
complexity of the ``Parser`` (there's another parser sitting inside complexity of the ``Parser`` (there's another parser sitting inside
``Statement``, which produces ``Array`` and ``Call``). ``Statement``, which produces ``Array`` and ``Call``).
""" """
from typing import Dict from typing import Dict, Type
from parso import tree from parso import tree
from parso.pgen2.generator import ReservedString from parso.pgen2.generator import ReservedString
@@ -110,10 +110,10 @@ class BaseParser:
When a syntax error occurs, error_recovery() is called. When a syntax error occurs, error_recovery() is called.
""" """
node_map: Dict[str, type] = {} node_map: Dict[str, Type[tree.BaseNode]] = {}
default_node = tree.Node default_node = tree.Node
leaf_map: Dict[str, type] = {} leaf_map: Dict[str, Type[tree.Leaf]] = {}
default_leaf = tree.Leaf default_leaf = tree.Leaf
def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
@@ -156,8 +156,6 @@ class BaseParser:
node = self.node_map[nonterminal](children) node = self.node_map[nonterminal](children)
except KeyError: except KeyError:
node = self.default_node(nonterminal, children) node = self.default_node(nonterminal, children)
for c in children:
c.parent = node
return node return node
def convert_leaf(self, type_, value, prefix, start_pos): def convert_leaf(self, type_, value, prefix, start_pos):

View File

@@ -5,7 +5,6 @@ import re
from contextlib import contextmanager from contextlib import contextmanager
from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule
from parso.python.tree import search_ancestor
from parso.python.tokenize import _get_token_collection from parso.python.tokenize import _get_token_collection
_BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') _BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt')
@@ -231,7 +230,7 @@ def _any_fstring_error(version, node):
elif node.type == "fstring": elif node.type == "fstring":
return True return True
else: else:
return search_ancestor(node, "fstring") return node.search_ancestor("fstring")
class _Context: class _Context:
@@ -1265,7 +1264,7 @@ class _NamedExprRule(_CheckAssignmentRule):
def search_all_comp_ancestors(node): def search_all_comp_ancestors(node):
has_ancestors = False has_ancestors = False
while True: while True:
node = search_ancestor(node, 'testlist_comp', 'dictorsetmaker') node = node.search_ancestor('testlist_comp', 'dictorsetmaker')
if node is None: if node is None:
break break
for child in node.children: for child in node.children:

View File

@@ -96,8 +96,6 @@ class Parser(BaseParser):
# prefixes. Just ignore them. # prefixes. Just ignore them.
children = [children[0]] + children[2:-1] children = [children[0]] + children[2:-1]
node = self.default_node(nonterminal, children) node = self.default_node(nonterminal, children)
for c in children:
c.parent = node
return node return node
def convert_leaf(self, type, value, prefix, start_pos): def convert_leaf(self, type, value, prefix, start_pos):
@@ -185,8 +183,6 @@ class Parser(BaseParser):
if all_nodes: if all_nodes:
node = tree.PythonErrorNode(all_nodes) node = tree.PythonErrorNode(all_nodes)
for n in all_nodes:
n.parent = node
self.stack[start_index - 1].nodes.append(node) self.stack[start_index - 1].nodes.append(node)
self.stack[start_index:] = [] self.stack[start_index:] = []

View File

@@ -4,7 +4,7 @@ from typing import Tuple
from parso.python.errors import ErrorFinder, ErrorFinderConfig from parso.python.errors import ErrorFinder, ErrorFinderConfig
from parso.normalizer import Rule from parso.normalizer import Rule
from parso.python.tree import search_ancestor, Flow, Scope from parso.python.tree import Flow, Scope
_IMPORT_TYPES = ('import_name', 'import_from') _IMPORT_TYPES = ('import_name', 'import_from')
@@ -124,7 +124,7 @@ class BackslashNode(IndentationNode):
type = IndentationTypes.BACKSLASH type = IndentationTypes.BACKSLASH
def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None): def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None):
expr_stmt = search_ancestor(containing_leaf, 'expr_stmt') expr_stmt = containing_leaf.search_ancestor('expr_stmt')
if expr_stmt is not None: if expr_stmt is not None:
equals = expr_stmt.children[-2] equals = expr_stmt.children[-2]
@@ -724,11 +724,11 @@ class PEP8Normalizer(ErrorFinder):
def add_issue(self, node, code, message): def add_issue(self, node, code, message):
if self._previous_leaf is not None: if self._previous_leaf is not None:
if search_ancestor(self._previous_leaf, 'error_node') is not None: if self._previous_leaf.search_ancestor('error_node') is not None:
return return
if self._previous_leaf.type == 'error_leaf': if self._previous_leaf.type == 'error_leaf':
return return
if search_ancestor(node, 'error_node') is not None: if node.search_ancestor('error_node') is not None:
return return
if code in (901, 903): if code in (901, 903):
# 901 and 903 are raised by the ErrorFinder. # 901 and 903 are raised by the ErrorFinder.

View File

@@ -40,6 +40,14 @@ class PrefixPart:
self.start_pos self.start_pos
) )
def search_ancestor(self, *node_types):
node = self.parent
while node is not None:
if node.type in node_types:
return node
node = node.parent
return None
_comment = r'#[^\n\r\f]*' _comment = r'#[^\n\r\f]*'
_backslash = r'\\\r?\n' _backslash = r'\\\r?\n'

View File

@@ -49,8 +49,7 @@ except ImportError:
from collections import Mapping from collections import Mapping
from typing import Tuple from typing import Tuple
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf
search_ancestor
from parso.python.prefix import split_prefix from parso.python.prefix import split_prefix
from parso.utils import split_lines from parso.utils import split_lines
@@ -549,7 +548,11 @@ class Function(ClassOrFunc):
def __init__(self, children): def __init__(self, children):
super().__init__(children) super().__init__(children)
parameters = self.children[2] # After `def foo` parameters = self.children[2] # After `def foo`
parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) parameters_children = parameters.children[1:-1]
# If input parameters list already has Param objects, keep it as is;
# otherwise, convert it to a list of Param objects.
if not any(isinstance(child, Param) for child in parameters_children):
parameters.children[1:-1] = _create_params(parameters, parameters_children)
def _get_param_nodes(self): def _get_param_nodes(self):
return self.children[2].children return self.children[2].children
@@ -652,7 +655,11 @@ class Lambda(Function):
# We don't want to call the Function constructor, call its parent. # We don't want to call the Function constructor, call its parent.
super(Function, self).__init__(children) super(Function, self).__init__(children)
# Everything between `lambda` and the `:` operator is a parameter. # Everything between `lambda` and the `:` operator is a parameter.
self.children[1:-2] = _create_params(self, self.children[1:-2]) parameters_children = self.children[1:-2]
# If input children list already has Param objects, keep it as is;
# otherwise, convert it to a list of Param objects.
if not any(isinstance(child, Param) for child in parameters_children):
self.children[1:-2] = _create_params(self, parameters_children)
@property @property
def name(self): def name(self):
@@ -776,7 +783,7 @@ class WithStmt(Flow):
return names return names
def get_test_node_from_name(self, name): def get_test_node_from_name(self, name):
node = search_ancestor(name, "with_item") node = name.search_ancestor("with_item")
if node is None: if node is None:
raise ValueError('The name is not actually part of a with statement.') raise ValueError('The name is not actually part of a with statement.')
return node.children[0] return node.children[0]
@@ -1080,11 +1087,9 @@ class Param(PythonBaseNode):
""" """
type = 'param' type = 'param'
def __init__(self, children, parent): def __init__(self, children, parent=None):
super().__init__(children) super().__init__(children)
self.parent = parent self.parent = parent
for child in children:
child.parent = self
@property @property
def star_count(self): def star_count(self):
@@ -1171,7 +1176,7 @@ class Param(PythonBaseNode):
""" """
Returns the function/lambda of a parameter. Returns the function/lambda of a parameter.
""" """
return search_ancestor(self, 'funcdef', 'lambdef') return self.search_ancestor('funcdef', 'lambdef')
def get_code(self, include_prefix=True, include_comma=True): def get_code(self, include_prefix=True, include_comma=True):
""" """

View File

@@ -1,22 +1,25 @@
from abc import abstractmethod, abstractproperty from abc import abstractmethod, abstractproperty
from typing import List, Optional, Tuple from typing import List, Optional, Tuple, Union
from parso.utils import split_lines from parso.utils import split_lines
def search_ancestor(node, *node_types): def search_ancestor(node: 'NodeOrLeaf', *node_types: str) -> 'Optional[BaseNode]':
""" """
Recursively looks at the parents of a node and returns the first found node Recursively looks at the parents of a node and returns the first found node
that matches node_types. Returns ``None`` if no matching node is found. that matches ``node_types``. Returns ``None`` if no matching node is found.
This function is deprecated, use :meth:`NodeOrLeaf.search_ancestor` instead.
:param node: The ancestors of this node will be checked. :param node: The ancestors of this node will be checked.
:param node_types: type names that are searched for. :param node_types: type names that are searched for.
:type node_types: tuple of str
""" """
while True: n = node.parent
node = node.parent while n is not None:
if node is None or node.type in node_types: if n.type in node_types:
return node return n
n = n.parent
return None
class NodeOrLeaf: class NodeOrLeaf:
@@ -28,6 +31,11 @@ class NodeOrLeaf:
''' '''
The type is a string that typically matches the types of the grammar file. The type is a string that typically matches the types of the grammar file.
''' '''
parent: 'Optional[BaseNode]'
'''
The parent :class:`BaseNode` of this node or leaf.
None if this is the root node.
'''
def get_root_node(self): def get_root_node(self):
""" """
@@ -173,6 +181,109 @@ class NodeOrLeaf:
e.g. a statement. e.g. a statement.
""" """
def search_ancestor(self, *node_types: str) -> 'Optional[BaseNode]':
"""
Recursively looks at the parents of this node or leaf and returns the
first found node that matches ``node_types``. Returns ``None`` if no
matching node is found.
:param node_types: type names that are searched for.
"""
node = self.parent
while node is not None:
if node.type in node_types:
return node
node = node.parent
return None
def dump(self, *, indent: Optional[Union[int, str]] = 4) -> str:
"""
Returns a formatted dump of the parser tree rooted at this node or leaf. This is
mainly useful for debugging purposes.
The ``indent`` parameter is interpreted in a similar way as :py:func:`ast.dump`.
If ``indent`` is a non-negative integer or string, then the tree will be
pretty-printed with that indent level. An indent level of 0, negative, or ``""``
will only insert newlines. ``None`` selects the single line representation.
Using a positive integer indent indents that many spaces per level. If
``indent`` is a string (such as ``"\\t"``), that string is used to indent each
level.
:param indent: Indentation style as described above. The default indentation is
4 spaces, which yields a pretty-printed dump.
>>> import parso
>>> print(parso.parse("lambda x, y: x + y").dump())
Module([
Lambda([
Keyword('lambda', (1, 0)),
Param([
Name('x', (1, 7), prefix=' '),
Operator(',', (1, 8)),
]),
Param([
Name('y', (1, 10), prefix=' '),
]),
Operator(':', (1, 11)),
PythonNode('arith_expr', [
Name('x', (1, 13), prefix=' '),
Operator('+', (1, 15), prefix=' '),
Name('y', (1, 17), prefix=' '),
]),
]),
EndMarker('', (1, 18)),
])
"""
if indent is None:
newline = False
indent_string = ''
elif isinstance(indent, int):
newline = True
indent_string = ' ' * indent
elif isinstance(indent, str):
newline = True
indent_string = indent
else:
raise TypeError(f"expect 'indent' to be int, str or None, got {indent!r}")
def _format_dump(node: NodeOrLeaf, indent: str = '', top_level: bool = True) -> str:
result = ''
node_type = type(node).__name__
if isinstance(node, Leaf):
result += f'{indent}{node_type}('
if isinstance(node, ErrorLeaf):
result += f'{node.token_type!r}, '
elif isinstance(node, TypedLeaf):
result += f'{node.type!r}, '
result += f'{node.value!r}, {node.start_pos!r}'
if node.prefix:
result += f', prefix={node.prefix!r}'
result += ')'
elif isinstance(node, BaseNode):
result += f'{indent}{node_type}('
if isinstance(node, Node):
result += f'{node.type!r}, '
result += '['
if newline:
result += '\n'
for child in node.children:
result += _format_dump(child, indent=indent + indent_string, top_level=False)
result += f'{indent}])'
else: # pragma: no cover
# We shouldn't ever reach here, unless:
# - `NodeOrLeaf` is incorrectly subclassed else where
# - or a node's children list contains invalid nodes or leafs
# Both are unexpected internal errors.
raise TypeError(f'unsupported node encountered: {node!r}')
if not top_level:
if newline:
result += ',\n'
else:
result += ', '
return result
return _format_dump(self)
class Leaf(NodeOrLeaf): class Leaf(NodeOrLeaf):
''' '''
@@ -180,6 +291,7 @@ class Leaf(NodeOrLeaf):
were defined and what text preceeds them. were defined and what text preceeds them.
''' '''
__slots__ = ('value', 'parent', 'line', 'column', 'prefix') __slots__ = ('value', 'parent', 'line', 'column', 'prefix')
prefix: str
def __init__(self, value: str, start_pos: Tuple[int, int], prefix: str = '') -> None: def __init__(self, value: str, start_pos: Tuple[int, int], prefix: str = '') -> None:
self.value = value self.value = value
@@ -266,9 +378,11 @@ class BaseNode(NodeOrLeaf):
""" """
self.parent: Optional[BaseNode] = None self.parent: Optional[BaseNode] = None
''' '''
The parent :class:`BaseNode` of this leaf. The parent :class:`BaseNode` of this node.
None if this is the root node. None if this is the root node.
''' '''
for child in children:
child.parent = self
@property @property
def start_pos(self) -> Tuple[int, int]: def start_pos(self) -> Tuple[int, int]:

182
test/test_dump_tree.py Normal file
View File

@@ -0,0 +1,182 @@
from textwrap import dedent
import pytest
from parso import parse
# Using star import for easier eval testing below.
from parso.python.tree import * # noqa: F403
from parso.tree import * # noqa: F403
from parso.tree import ErrorLeaf, TypedLeaf
@pytest.mark.parametrize(
'indent,expected_dump', [
(None, "Module(["
"Lambda(["
"Keyword('lambda', (1, 0)), "
"Param(["
"Name('x', (1, 7), prefix=' '), "
"Operator(',', (1, 8)), "
"]), "
"Param(["
"Name('y', (1, 10), prefix=' '), "
"]), "
"Operator(':', (1, 11)), "
"PythonNode('arith_expr', ["
"Name('x', (1, 13), prefix=' '), "
"Operator('+', (1, 15), prefix=' '), "
"Name('y', (1, 17), prefix=' '), "
"]), "
"]), "
"EndMarker('', (1, 18)), "
"])"),
(0, dedent('''\
Module([
Lambda([
Keyword('lambda', (1, 0)),
Param([
Name('x', (1, 7), prefix=' '),
Operator(',', (1, 8)),
]),
Param([
Name('y', (1, 10), prefix=' '),
]),
Operator(':', (1, 11)),
PythonNode('arith_expr', [
Name('x', (1, 13), prefix=' '),
Operator('+', (1, 15), prefix=' '),
Name('y', (1, 17), prefix=' '),
]),
]),
EndMarker('', (1, 18)),
])''')),
(4, dedent('''\
Module([
Lambda([
Keyword('lambda', (1, 0)),
Param([
Name('x', (1, 7), prefix=' '),
Operator(',', (1, 8)),
]),
Param([
Name('y', (1, 10), prefix=' '),
]),
Operator(':', (1, 11)),
PythonNode('arith_expr', [
Name('x', (1, 13), prefix=' '),
Operator('+', (1, 15), prefix=' '),
Name('y', (1, 17), prefix=' '),
]),
]),
EndMarker('', (1, 18)),
])''')),
('\t', dedent('''\
Module([
\tLambda([
\t\tKeyword('lambda', (1, 0)),
\t\tParam([
\t\t\tName('x', (1, 7), prefix=' '),
\t\t\tOperator(',', (1, 8)),
\t\t]),
\t\tParam([
\t\t\tName('y', (1, 10), prefix=' '),
\t\t]),
\t\tOperator(':', (1, 11)),
\t\tPythonNode('arith_expr', [
\t\t\tName('x', (1, 13), prefix=' '),
\t\t\tOperator('+', (1, 15), prefix=' '),
\t\t\tName('y', (1, 17), prefix=' '),
\t\t]),
\t]),
\tEndMarker('', (1, 18)),
])''')),
]
)
def test_dump_parser_tree(indent, expected_dump):
code = "lambda x, y: x + y"
module = parse(code)
assert module.dump(indent=indent) == expected_dump
# Check that dumped tree can be eval'd to recover the parser tree and original code.
recovered_code = eval(expected_dump).get_code()
assert recovered_code == code
@pytest.mark.parametrize(
'node,expected_dump,expected_code', [
( # Dump intermediate node (not top level module)
parse("def foo(x, y): return x + y").children[0], dedent('''\
Function([
Keyword('def', (1, 0)),
Name('foo', (1, 4), prefix=' '),
PythonNode('parameters', [
Operator('(', (1, 7)),
Param([
Name('x', (1, 8)),
Operator(',', (1, 9)),
]),
Param([
Name('y', (1, 11), prefix=' '),
]),
Operator(')', (1, 12)),
]),
Operator(':', (1, 13)),
ReturnStmt([
Keyword('return', (1, 15), prefix=' '),
PythonNode('arith_expr', [
Name('x', (1, 22), prefix=' '),
Operator('+', (1, 24), prefix=' '),
Name('y', (1, 26), prefix=' '),
]),
]),
])'''),
"def foo(x, y): return x + y",
),
( # Dump leaf
parse("def foo(x, y): return x + y").children[0].children[0],
"Keyword('def', (1, 0))",
'def',
),
( # Dump ErrorLeaf
ErrorLeaf('error_type', 'error_code', (1, 1), prefix=' '),
"ErrorLeaf('error_type', 'error_code', (1, 1), prefix=' ')",
' error_code',
),
( # Dump TypedLeaf
TypedLeaf('type', 'value', (1, 1)),
"TypedLeaf('type', 'value', (1, 1))",
'value',
),
]
)
def test_dump_parser_tree_not_top_level_module(node, expected_dump, expected_code):
dump_result = node.dump()
assert dump_result == expected_dump
# Check that dumped tree can be eval'd to recover the parser tree and original code.
recovered_code = eval(dump_result).get_code()
assert recovered_code == expected_code
def test_dump_parser_tree_invalid_args():
module = parse("lambda x, y: x + y")
with pytest.raises(TypeError):
module.dump(indent=1.1)
def test_eval_dump_recovers_parent():
module = parse("lambda x, y: x + y")
module2 = eval(module.dump())
assert module2.parent is None
lambda_node = module2.children[0]
assert lambda_node.parent is module2
assert module2.children[1].parent is module2
assert lambda_node.children[0].parent is lambda_node
param_node = lambda_node.children[1]
assert param_node.parent is lambda_node
assert param_node.children[0].parent is param_node
assert param_node.children[1].parent is param_node
arith_expr_node = lambda_node.children[-1]
assert arith_expr_node.parent is lambda_node
assert arith_expr_node.children[0].parent is arith_expr_node

View File

@@ -6,6 +6,7 @@ import pytest
from parso import parse from parso import parse
from parso.python import tree from parso.python import tree
from parso.tree import search_ancestor
class TestsFunctionAndLambdaParsing: class TestsFunctionAndLambdaParsing:
@@ -239,3 +240,27 @@ def test_with_stmt_get_test_node_from_name():
for name in with_stmt.get_defined_names(include_setitem=True) for name in with_stmt.get_defined_names(include_setitem=True)
] ]
assert tests == ["A", "B", "C", "D"] assert tests == ["A", "B", "C", "D"]
sample_module = parse('x + y')
sample_node = sample_module.children[0]
sample_leaf = sample_node.children[0]
@pytest.mark.parametrize(
'node,node_types,expected_ancestor', [
(sample_module, ('file_input',), None),
(sample_node, ('arith_expr',), None),
(sample_node, ('file_input', 'eval_input'), sample_module),
(sample_leaf, ('name',), None),
(sample_leaf, ('arith_expr',), sample_node),
(sample_leaf, ('file_input',), sample_module),
(sample_leaf, ('file_input', 'arith_expr'), sample_node),
(sample_leaf, ('shift_expr',), None),
(sample_leaf, ('name', 'shift_expr',), None),
(sample_leaf, (), None),
]
)
def test_search_ancestor(node, node_types, expected_ancestor):
assert node.search_ancestor(*node_types) is expected_ancestor
assert search_ancestor(node, *node_types) is expected_ancestor # deprecated