Move the parse function to the grammar.

2026-05-17 05:49:58 +08:00 · 2017-05-22 14:06:40 -04:00
parent 3a0cd6d377
commit 7d3438c94d
4 changed files with 99 additions and 97 deletions
@@ -1,6 +1,11 @@
 import hashlib
 from parso.pgen2.pgen import generate_grammar
 from parso.utils import splitlines, source_to_unicode
 from parso.python.parser import Parser, remove_last_newline
 from parso.python.diff import DiffParser
 from parso.tokenize import generate_tokens
 from parso.cache import parser_cache, load_module, save_module
 class Grammar(object):
@@ -11,6 +16,94 @@ class Grammar(object):
        self._diff_parser = diff_parser
        self._sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
    def parse(self, code=None, **kwargs):
        """
        If you want to parse a Python file you want to start here, most likely.
        If you need finer grained control over the parsed instance, there will be
        other ways to access it.
        :param code: A unicode string that contains Python code.
        :param path: The path to the file you want to open. Only needed for caching.
        :param grammar: A Python grammar file, created with load_grammar. You may
            not specify it. In that case it's the current Python version.
        :param error_recovery: If enabled, any code will be returned. If it is
            invalid, it will be returned as an error node. If disabled, you will
            get a ParseError when encountering syntax errors in your code.
        :param start_symbol: The grammar symbol that you want to parse. Only
            allowed to be used when error_recovery is disabled.
        :param cache_path: If given saves the parso cache in this directory. If not
            given, defaults to the default cache places on each platform.
        :return: A syntax tree node. Typically the module.
        """
        return self._parse(code=code, **kwargs)
    def _parse(self, code=None, path=None, error_recovery=True,
               start_symbol='file_input', cache=False, diff_cache=False,
               cache_path=None):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
        """
        if code is None and path is None:
            raise TypeError("Please provide either code or a path.")
        if cache and code is None and path is not None:
            # With the current architecture we cannot load from cache if the
            # code is given, because we just load from cache if it's not older than
            # the latest change (file last modified).
            module_node = load_module(self, path, cache_path=cache_path)
            if module_node is not None:
                return module_node
        if code is None:
            with open(path, 'rb') as f:
                code = source_to_unicode(f.read())
        lines = tokenize_lines = splitlines(code, keepends=True)
        if diff_cache:
            try:
                module_cache_item = parser_cache[path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    # TODO remove this line? I think it's not needed. (dave)
                    save_module(self, path, module_node, lines, pickling=False,
                                cache_path=cache_path)
                    return module_node
                # TODO I think it's wrong that we have self here.
                new_node = DiffParser(self, module_node).update(
                    old_lines=old_lines,
                    new_lines=lines
                )
                save_module(self, path, new_node, lines, pickling=cache,
                            cache_path=cache_path)
                return new_node
        added_newline = not code.endswith('\n')
        if added_newline:
            code += '\n'
            tokenize_lines = list(tokenize_lines)
            tokenize_lines[-1] += '\n'
            tokenize_lines.append('')
        tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
        p = Parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
        root_node = p.parse(tokens=tokens)
        if added_newline:
            remove_last_newline(root_node)
        if cache or diff_cache:
            save_module(self, path, root_node, lines, pickling=cache,
                        cache_path=cache_path)
        return root_node
    def __repr__(self):
        labels = self._pgen_grammar.symbol2number.values()
        txt = ' '.join(list(labels)[:3]) + ' ...'
@@ -1,98 +1,8 @@
 """
 Parsers for Python
 """
 from parso.utils import splitlines, source_to_unicode
 from parso.python.parser import Parser, remove_last_newline
 from parso.python.diff import DiffParser
 from parso.tokenize import generate_tokens
 from parso.cache import parser_cache, load_module, save_module
-
+def parse(code, **kwargs):
-def parse(code=None, **kwargs):
+    from parso import load_python_grammar
-    """
+    grammar = load_python_grammar()
-    If you want to parse a Python file you want to start here, most likely.
+    return grammar.parse(code, **kwargs)
    If you need finer grained control over the parsed instance, there will be
    other ways to access it.
    :param code: A unicode string that contains Python code.
    :param path: The path to the file you want to open. Only needed for caching.
    :param grammar: A Python grammar file, created with load_grammar. You may
        not specify it. In that case it's the current Python version.
    :param error_recovery: If enabled, any code will be returned. If it is
        invalid, it will be returned as an error node. If disabled, you will
        get a ParseError when encountering syntax errors in your code.
    :param start_symbol: The grammar symbol that you want to parse. Only
        allowed to be used when error_recovery is disabled.
    :param cache_path: If given saves the parso cache in this directory. If not
        given, defaults to the default cache places on each platform.
    :return: A syntax tree node. Typically the module.
    """
    # Wanted python3.5 * operator and keyword only arguments. Therefore just
    # wrap it all.
    def _parse(code=None, path=None, grammar=None, error_recovery=True,
          start_symbol='file_input', cache=False, diff_cache=False,
          cache_path=None):
        if code is None and path is None:
            raise TypeError("Please provide either code or a path.")
        if grammar is None:
            from parso import load_python_grammar
            grammar = load_python_grammar()
        if cache and code is None and path is not None:
            # With the current architecture we cannot load from cache if the
            # code is given, because we just load from cache if it's not older than
            # the latest change (file last modified).
            module_node = load_module(grammar, path, cache_path=cache_path)
            if module_node is not None:
                return module_node
        if code is None:
            with open(path, 'rb') as f:
                code = source_to_unicode(f.read())
        lines = tokenize_lines = splitlines(code, keepends=True)
        if diff_cache:
            try:
                module_cache_item = parser_cache[path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    # TODO remove this line? I think it's not needed. (dave)
                    save_module(grammar, path, module_node, lines, pickling=False,
                                cache_path=cache_path)
                    return module_node
                new_node = DiffParser(grammar, module_node).update(
                    old_lines=old_lines,
                    new_lines=lines
                )
                save_module(grammar, path, new_node, lines, pickling=cache,
                            cache_path=cache_path)
                return new_node
        added_newline = not code.endswith('\n')
        if added_newline:
            code += '\n'
            tokenize_lines = list(tokenize_lines)
            tokenize_lines[-1] += '\n'
            tokenize_lines.append('')
        tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
        p = Parser(grammar._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
        root_node = p.parse(tokens=tokens)
        if added_newline:
            remove_last_newline(root_node)
        if cache or diff_cache:
            save_module(grammar, path, root_node, lines, pickling=cache,
                        cache_path=cache_path)
        return root_node
    return _parse(code=code, **kwargs)
@@ -114,7 +114,7 @@ def test_param_splitting():
    def check(src, result):
        # Python 2 tuple params should be ignored for now.
        grammar = load_python_grammar('%s.%s' % sys.version_info[:2])
-        m = parse(src, grammar=grammar)
+        m = grammar.parse(src)
        if py_version >= 30:
            assert not list(m.iter_funcdefs())
        else:
@@ -10,7 +10,6 @@ from textwrap import dedent
 from parso._compatibility import py_version
 from parso import load_python_grammar
 from parso.python import parse as _parse
 from parso import ParserSyntaxError
 import pytest
@@ -18,7 +17,7 @@ import pytest
 def parse(code, version='3.4'):
    code = dedent(code) + "\n\n"
    grammar = load_python_grammar(version=version)
-    return _parse(code, grammar=grammar, error_recovery=False)
+    return grammar.parse(code, error_recovery=False)
 def test_formfeed():