From 86f3f1096b1126df8b8c914a54a4951a30e67eaa Mon Sep 17 00:00:00 2001 From: Saiyang Gou Date: Sat, 29 May 2021 12:40:07 -0700 Subject: [PATCH] Add `NodeOrLeaf.dump()` and `NodeOrLeaf.search_ancestor()` (#187) - Add `NodeOrLeaf.dump()` to generate a readable and "round-trippable" dump for a parser tree - `parso.tree.search_ancestor()` is deprecated, use `NodeOrLeaf.search_ancestor()` instead - Set up children's parent in `BaseNode.__init__()` - Add test for `search_ancestor` - Various small type annotations improvements --- .coveragerc | 2 + .gitignore | 1 + parso/parser.py | 8 +- parso/python/errors.py | 5 +- parso/python/parser.py | 4 - parso/python/pep8.py | 8 +- parso/python/prefix.py | 8 ++ parso/python/tree.py | 23 +++-- parso/tree.py | 132 ++++++++++++++++++++++++++-- test/test_dump_tree.py | 182 +++++++++++++++++++++++++++++++++++++++ test/test_parser_tree.py | 25 ++++++ 11 files changed, 364 insertions(+), 34 deletions(-) create mode 100644 test/test_dump_tree.py diff --git a/.coveragerc b/.coveragerc index c022851..06416fc 100644 --- a/.coveragerc +++ b/.coveragerc @@ -4,6 +4,8 @@ source = parso [report] # Regexes for lines to exclude from consideration exclude_lines = + pragma: no cover + # Don't complain about missing debug-only code: def __repr__ diff --git a/.gitignore b/.gitignore index 0c817be..2d869e2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ parso.egg-info/ /.pytest_cache test/fuzz-redo.pickle /venv/ +/htmlcov/ diff --git a/parso/parser.py b/parso/parser.py index 3b25f35..3746643 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -23,7 +23,7 @@ within the statement. This lowers memory usage and cpu time and reduces the complexity of the ``Parser`` (there's another parser sitting inside ``Statement``, which produces ``Array`` and ``Call``). """ -from typing import Dict +from typing import Dict, Type from parso import tree from parso.pgen2.generator import ReservedString @@ -110,10 +110,10 @@ class BaseParser: When a syntax error occurs, error_recovery() is called. """ - node_map: Dict[str, type] = {} + node_map: Dict[str, Type[tree.BaseNode]] = {} default_node = tree.Node - leaf_map: Dict[str, type] = {} + leaf_map: Dict[str, Type[tree.Leaf]] = {} default_leaf = tree.Leaf def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): @@ -156,8 +156,6 @@ class BaseParser: node = self.node_map[nonterminal](children) except KeyError: node = self.default_node(nonterminal, children) - for c in children: - c.parent = node return node def convert_leaf(self, type_, value, prefix, start_pos): diff --git a/parso/python/errors.py b/parso/python/errors.py index 5561860..5da046a 100644 --- a/parso/python/errors.py +++ b/parso/python/errors.py @@ -5,7 +5,6 @@ import re from contextlib import contextmanager from parso.normalizer import Normalizer, NormalizerConfig, Issue, Rule -from parso.python.tree import search_ancestor from parso.python.tokenize import _get_token_collection _BLOCK_STMTS = ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt', 'with_stmt') @@ -231,7 +230,7 @@ def _any_fstring_error(version, node): elif node.type == "fstring": return True else: - return search_ancestor(node, "fstring") + return node.search_ancestor("fstring") class _Context: @@ -1265,7 +1264,7 @@ class _NamedExprRule(_CheckAssignmentRule): def search_all_comp_ancestors(node): has_ancestors = False while True: - node = search_ancestor(node, 'testlist_comp', 'dictorsetmaker') + node = node.search_ancestor('testlist_comp', 'dictorsetmaker') if node is None: break for child in node.children: diff --git a/parso/python/parser.py b/parso/python/parser.py index da92d44..fa45e8b 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -96,8 +96,6 @@ class Parser(BaseParser): # prefixes. Just ignore them. children = [children[0]] + children[2:-1] node = self.default_node(nonterminal, children) - for c in children: - c.parent = node return node def convert_leaf(self, type, value, prefix, start_pos): @@ -185,8 +183,6 @@ class Parser(BaseParser): if all_nodes: node = tree.PythonErrorNode(all_nodes) - for n in all_nodes: - n.parent = node self.stack[start_index - 1].nodes.append(node) self.stack[start_index:] = [] diff --git a/parso/python/pep8.py b/parso/python/pep8.py index e821a45..b277d99 100644 --- a/parso/python/pep8.py +++ b/parso/python/pep8.py @@ -4,7 +4,7 @@ from typing import Tuple from parso.python.errors import ErrorFinder, ErrorFinderConfig from parso.normalizer import Rule -from parso.python.tree import search_ancestor, Flow, Scope +from parso.python.tree import Flow, Scope _IMPORT_TYPES = ('import_name', 'import_from') @@ -124,7 +124,7 @@ class BackslashNode(IndentationNode): type = IndentationTypes.BACKSLASH def __init__(self, config, parent_indentation, containing_leaf, spacing, parent=None): - expr_stmt = search_ancestor(containing_leaf, 'expr_stmt') + expr_stmt = containing_leaf.search_ancestor('expr_stmt') if expr_stmt is not None: equals = expr_stmt.children[-2] @@ -724,11 +724,11 @@ class PEP8Normalizer(ErrorFinder): def add_issue(self, node, code, message): if self._previous_leaf is not None: - if search_ancestor(self._previous_leaf, 'error_node') is not None: + if self._previous_leaf.search_ancestor('error_node') is not None: return if self._previous_leaf.type == 'error_leaf': return - if search_ancestor(node, 'error_node') is not None: + if node.search_ancestor('error_node') is not None: return if code in (901, 903): # 901 and 903 are raised by the ErrorFinder. diff --git a/parso/python/prefix.py b/parso/python/prefix.py index 38b750c..1e08b41 100644 --- a/parso/python/prefix.py +++ b/parso/python/prefix.py @@ -40,6 +40,14 @@ class PrefixPart: self.start_pos ) + def search_ancestor(self, *node_types): + node = self.parent + while node is not None: + if node.type in node_types: + return node + node = node.parent + return None + _comment = r'#[^\n\r\f]*' _backslash = r'\\\r?\n' diff --git a/parso/python/tree.py b/parso/python/tree.py index 6d8eb66..a2fc016 100644 --- a/parso/python/tree.py +++ b/parso/python/tree.py @@ -49,8 +49,7 @@ except ImportError: from collections import Mapping from typing import Tuple -from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ - search_ancestor +from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf from parso.python.prefix import split_prefix from parso.utils import split_lines @@ -549,7 +548,11 @@ class Function(ClassOrFunc): def __init__(self, children): super().__init__(children) parameters = self.children[2] # After `def foo` - parameters.children[1:-1] = _create_params(parameters, parameters.children[1:-1]) + parameters_children = parameters.children[1:-1] + # If input parameters list already has Param objects, keep it as is; + # otherwise, convert it to a list of Param objects. + if not any(isinstance(child, Param) for child in parameters_children): + parameters.children[1:-1] = _create_params(parameters, parameters_children) def _get_param_nodes(self): return self.children[2].children @@ -652,7 +655,11 @@ class Lambda(Function): # We don't want to call the Function constructor, call its parent. super(Function, self).__init__(children) # Everything between `lambda` and the `:` operator is a parameter. - self.children[1:-2] = _create_params(self, self.children[1:-2]) + parameters_children = self.children[1:-2] + # If input children list already has Param objects, keep it as is; + # otherwise, convert it to a list of Param objects. + if not any(isinstance(child, Param) for child in parameters_children): + self.children[1:-2] = _create_params(self, parameters_children) @property def name(self): @@ -776,7 +783,7 @@ class WithStmt(Flow): return names def get_test_node_from_name(self, name): - node = search_ancestor(name, "with_item") + node = name.search_ancestor("with_item") if node is None: raise ValueError('The name is not actually part of a with statement.') return node.children[0] @@ -1080,11 +1087,9 @@ class Param(PythonBaseNode): """ type = 'param' - def __init__(self, children, parent): + def __init__(self, children, parent=None): super().__init__(children) self.parent = parent - for child in children: - child.parent = self @property def star_count(self): @@ -1171,7 +1176,7 @@ class Param(PythonBaseNode): """ Returns the function/lambda of a parameter. """ - return search_ancestor(self, 'funcdef', 'lambdef') + return self.search_ancestor('funcdef', 'lambdef') def get_code(self, include_prefix=True, include_comma=True): """ diff --git a/parso/tree.py b/parso/tree.py index 312c80b..a379a15 100644 --- a/parso/tree.py +++ b/parso/tree.py @@ -1,22 +1,25 @@ from abc import abstractmethod, abstractproperty -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union from parso.utils import split_lines -def search_ancestor(node, *node_types): +def search_ancestor(node: 'NodeOrLeaf', *node_types: str) -> 'Optional[BaseNode]': """ Recursively looks at the parents of a node and returns the first found node - that matches node_types. Returns ``None`` if no matching node is found. + that matches ``node_types``. Returns ``None`` if no matching node is found. + + This function is deprecated, use :meth:`NodeOrLeaf.search_ancestor` instead. :param node: The ancestors of this node will be checked. :param node_types: type names that are searched for. - :type node_types: tuple of str """ - while True: - node = node.parent - if node is None or node.type in node_types: - return node + n = node.parent + while n is not None: + if n.type in node_types: + return n + n = n.parent + return None class NodeOrLeaf: @@ -28,6 +31,11 @@ class NodeOrLeaf: ''' The type is a string that typically matches the types of the grammar file. ''' + parent: 'Optional[BaseNode]' + ''' + The parent :class:`BaseNode` of this node or leaf. + None if this is the root node. + ''' def get_root_node(self): """ @@ -173,6 +181,109 @@ class NodeOrLeaf: e.g. a statement. """ + def search_ancestor(self, *node_types: str) -> 'Optional[BaseNode]': + """ + Recursively looks at the parents of this node or leaf and returns the + first found node that matches ``node_types``. Returns ``None`` if no + matching node is found. + + :param node_types: type names that are searched for. + """ + node = self.parent + while node is not None: + if node.type in node_types: + return node + node = node.parent + return None + + def dump(self, *, indent: Optional[Union[int, str]] = 4) -> str: + """ + Returns a formatted dump of the parser tree rooted at this node or leaf. This is + mainly useful for debugging purposes. + + The ``indent`` parameter is interpreted in a similar way as :py:func:`ast.dump`. + If ``indent`` is a non-negative integer or string, then the tree will be + pretty-printed with that indent level. An indent level of 0, negative, or ``""`` + will only insert newlines. ``None`` selects the single line representation. + Using a positive integer indent indents that many spaces per level. If + ``indent`` is a string (such as ``"\\t"``), that string is used to indent each + level. + + :param indent: Indentation style as described above. The default indentation is + 4 spaces, which yields a pretty-printed dump. + + >>> import parso + >>> print(parso.parse("lambda x, y: x + y").dump()) + Module([ + Lambda([ + Keyword('lambda', (1, 0)), + Param([ + Name('x', (1, 7), prefix=' '), + Operator(',', (1, 8)), + ]), + Param([ + Name('y', (1, 10), prefix=' '), + ]), + Operator(':', (1, 11)), + PythonNode('arith_expr', [ + Name('x', (1, 13), prefix=' '), + Operator('+', (1, 15), prefix=' '), + Name('y', (1, 17), prefix=' '), + ]), + ]), + EndMarker('', (1, 18)), + ]) + """ + if indent is None: + newline = False + indent_string = '' + elif isinstance(indent, int): + newline = True + indent_string = ' ' * indent + elif isinstance(indent, str): + newline = True + indent_string = indent + else: + raise TypeError(f"expect 'indent' to be int, str or None, got {indent!r}") + + def _format_dump(node: NodeOrLeaf, indent: str = '', top_level: bool = True) -> str: + result = '' + node_type = type(node).__name__ + if isinstance(node, Leaf): + result += f'{indent}{node_type}(' + if isinstance(node, ErrorLeaf): + result += f'{node.token_type!r}, ' + elif isinstance(node, TypedLeaf): + result += f'{node.type!r}, ' + result += f'{node.value!r}, {node.start_pos!r}' + if node.prefix: + result += f', prefix={node.prefix!r}' + result += ')' + elif isinstance(node, BaseNode): + result += f'{indent}{node_type}(' + if isinstance(node, Node): + result += f'{node.type!r}, ' + result += '[' + if newline: + result += '\n' + for child in node.children: + result += _format_dump(child, indent=indent + indent_string, top_level=False) + result += f'{indent}])' + else: # pragma: no cover + # We shouldn't ever reach here, unless: + # - `NodeOrLeaf` is incorrectly subclassed else where + # - or a node's children list contains invalid nodes or leafs + # Both are unexpected internal errors. + raise TypeError(f'unsupported node encountered: {node!r}') + if not top_level: + if newline: + result += ',\n' + else: + result += ', ' + return result + + return _format_dump(self) + class Leaf(NodeOrLeaf): ''' @@ -180,6 +291,7 @@ class Leaf(NodeOrLeaf): were defined and what text preceeds them. ''' __slots__ = ('value', 'parent', 'line', 'column', 'prefix') + prefix: str def __init__(self, value: str, start_pos: Tuple[int, int], prefix: str = '') -> None: self.value = value @@ -266,9 +378,11 @@ class BaseNode(NodeOrLeaf): """ self.parent: Optional[BaseNode] = None ''' - The parent :class:`BaseNode` of this leaf. + The parent :class:`BaseNode` of this node. None if this is the root node. ''' + for child in children: + child.parent = self @property def start_pos(self) -> Tuple[int, int]: diff --git a/test/test_dump_tree.py b/test/test_dump_tree.py new file mode 100644 index 0000000..d2d7259 --- /dev/null +++ b/test/test_dump_tree.py @@ -0,0 +1,182 @@ +from textwrap import dedent + +import pytest + +from parso import parse +# Using star import for easier eval testing below. +from parso.python.tree import * # noqa: F403 +from parso.tree import * # noqa: F403 +from parso.tree import ErrorLeaf, TypedLeaf + + +@pytest.mark.parametrize( + 'indent,expected_dump', [ + (None, "Module([" + "Lambda([" + "Keyword('lambda', (1, 0)), " + "Param([" + "Name('x', (1, 7), prefix=' '), " + "Operator(',', (1, 8)), " + "]), " + "Param([" + "Name('y', (1, 10), prefix=' '), " + "]), " + "Operator(':', (1, 11)), " + "PythonNode('arith_expr', [" + "Name('x', (1, 13), prefix=' '), " + "Operator('+', (1, 15), prefix=' '), " + "Name('y', (1, 17), prefix=' '), " + "]), " + "]), " + "EndMarker('', (1, 18)), " + "])"), + (0, dedent('''\ + Module([ + Lambda([ + Keyword('lambda', (1, 0)), + Param([ + Name('x', (1, 7), prefix=' '), + Operator(',', (1, 8)), + ]), + Param([ + Name('y', (1, 10), prefix=' '), + ]), + Operator(':', (1, 11)), + PythonNode('arith_expr', [ + Name('x', (1, 13), prefix=' '), + Operator('+', (1, 15), prefix=' '), + Name('y', (1, 17), prefix=' '), + ]), + ]), + EndMarker('', (1, 18)), + ])''')), + (4, dedent('''\ + Module([ + Lambda([ + Keyword('lambda', (1, 0)), + Param([ + Name('x', (1, 7), prefix=' '), + Operator(',', (1, 8)), + ]), + Param([ + Name('y', (1, 10), prefix=' '), + ]), + Operator(':', (1, 11)), + PythonNode('arith_expr', [ + Name('x', (1, 13), prefix=' '), + Operator('+', (1, 15), prefix=' '), + Name('y', (1, 17), prefix=' '), + ]), + ]), + EndMarker('', (1, 18)), + ])''')), + ('\t', dedent('''\ + Module([ + \tLambda([ + \t\tKeyword('lambda', (1, 0)), + \t\tParam([ + \t\t\tName('x', (1, 7), prefix=' '), + \t\t\tOperator(',', (1, 8)), + \t\t]), + \t\tParam([ + \t\t\tName('y', (1, 10), prefix=' '), + \t\t]), + \t\tOperator(':', (1, 11)), + \t\tPythonNode('arith_expr', [ + \t\t\tName('x', (1, 13), prefix=' '), + \t\t\tOperator('+', (1, 15), prefix=' '), + \t\t\tName('y', (1, 17), prefix=' '), + \t\t]), + \t]), + \tEndMarker('', (1, 18)), + ])''')), + ] +) +def test_dump_parser_tree(indent, expected_dump): + code = "lambda x, y: x + y" + module = parse(code) + assert module.dump(indent=indent) == expected_dump + + # Check that dumped tree can be eval'd to recover the parser tree and original code. + recovered_code = eval(expected_dump).get_code() + assert recovered_code == code + + +@pytest.mark.parametrize( + 'node,expected_dump,expected_code', [ + ( # Dump intermediate node (not top level module) + parse("def foo(x, y): return x + y").children[0], dedent('''\ + Function([ + Keyword('def', (1, 0)), + Name('foo', (1, 4), prefix=' '), + PythonNode('parameters', [ + Operator('(', (1, 7)), + Param([ + Name('x', (1, 8)), + Operator(',', (1, 9)), + ]), + Param([ + Name('y', (1, 11), prefix=' '), + ]), + Operator(')', (1, 12)), + ]), + Operator(':', (1, 13)), + ReturnStmt([ + Keyword('return', (1, 15), prefix=' '), + PythonNode('arith_expr', [ + Name('x', (1, 22), prefix=' '), + Operator('+', (1, 24), prefix=' '), + Name('y', (1, 26), prefix=' '), + ]), + ]), + ])'''), + "def foo(x, y): return x + y", + ), + ( # Dump leaf + parse("def foo(x, y): return x + y").children[0].children[0], + "Keyword('def', (1, 0))", + 'def', + ), + ( # Dump ErrorLeaf + ErrorLeaf('error_type', 'error_code', (1, 1), prefix=' '), + "ErrorLeaf('error_type', 'error_code', (1, 1), prefix=' ')", + ' error_code', + ), + ( # Dump TypedLeaf + TypedLeaf('type', 'value', (1, 1)), + "TypedLeaf('type', 'value', (1, 1))", + 'value', + ), + ] +) +def test_dump_parser_tree_not_top_level_module(node, expected_dump, expected_code): + dump_result = node.dump() + assert dump_result == expected_dump + + # Check that dumped tree can be eval'd to recover the parser tree and original code. + recovered_code = eval(dump_result).get_code() + assert recovered_code == expected_code + + +def test_dump_parser_tree_invalid_args(): + module = parse("lambda x, y: x + y") + + with pytest.raises(TypeError): + module.dump(indent=1.1) + + +def test_eval_dump_recovers_parent(): + module = parse("lambda x, y: x + y") + module2 = eval(module.dump()) + assert module2.parent is None + lambda_node = module2.children[0] + assert lambda_node.parent is module2 + assert module2.children[1].parent is module2 + assert lambda_node.children[0].parent is lambda_node + param_node = lambda_node.children[1] + assert param_node.parent is lambda_node + assert param_node.children[0].parent is param_node + assert param_node.children[1].parent is param_node + arith_expr_node = lambda_node.children[-1] + assert arith_expr_node.parent is lambda_node + assert arith_expr_node.children[0].parent is arith_expr_node diff --git a/test/test_parser_tree.py b/test/test_parser_tree.py index 9a4a2e3..b994b9b 100644 --- a/test/test_parser_tree.py +++ b/test/test_parser_tree.py @@ -6,6 +6,7 @@ import pytest from parso import parse from parso.python import tree +from parso.tree import search_ancestor class TestsFunctionAndLambdaParsing: @@ -239,3 +240,27 @@ def test_with_stmt_get_test_node_from_name(): for name in with_stmt.get_defined_names(include_setitem=True) ] assert tests == ["A", "B", "C", "D"] + + +sample_module = parse('x + y') +sample_node = sample_module.children[0] +sample_leaf = sample_node.children[0] + + +@pytest.mark.parametrize( + 'node,node_types,expected_ancestor', [ + (sample_module, ('file_input',), None), + (sample_node, ('arith_expr',), None), + (sample_node, ('file_input', 'eval_input'), sample_module), + (sample_leaf, ('name',), None), + (sample_leaf, ('arith_expr',), sample_node), + (sample_leaf, ('file_input',), sample_module), + (sample_leaf, ('file_input', 'arith_expr'), sample_node), + (sample_leaf, ('shift_expr',), None), + (sample_leaf, ('name', 'shift_expr',), None), + (sample_leaf, (), None), + ] +) +def test_search_ancestor(node, node_types, expected_ancestor): + assert node.search_ancestor(*node_types) is expected_ancestor + assert search_ancestor(node, *node_types) is expected_ancestor # deprecated