diff --git a/parso/grammar.py b/parso/grammar.py index 927696c..71112f3 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -1,6 +1,11 @@ import hashlib from parso.pgen2.pgen import generate_grammar +from parso.utils import splitlines, source_to_unicode +from parso.python.parser import Parser, remove_last_newline +from parso.python.diff import DiffParser +from parso.tokenize import generate_tokens +from parso.cache import parser_cache, load_module, save_module class Grammar(object): @@ -11,6 +16,94 @@ class Grammar(object): self._diff_parser = diff_parser self._sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest() + def parse(self, code=None, **kwargs): + """ + If you want to parse a Python file you want to start here, most likely. + + If you need finer grained control over the parsed instance, there will be + other ways to access it. + + :param code: A unicode string that contains Python code. + :param path: The path to the file you want to open. Only needed for caching. + :param grammar: A Python grammar file, created with load_grammar. You may + not specify it. In that case it's the current Python version. + :param error_recovery: If enabled, any code will be returned. If it is + invalid, it will be returned as an error node. If disabled, you will + get a ParseError when encountering syntax errors in your code. + :param start_symbol: The grammar symbol that you want to parse. Only + allowed to be used when error_recovery is disabled. + :param cache_path: If given saves the parso cache in this directory. If not + given, defaults to the default cache places on each platform. + + :return: A syntax tree node. Typically the module. + """ + return self._parse(code=code, **kwargs) + + def _parse(self, code=None, path=None, error_recovery=True, + start_symbol='file_input', cache=False, diff_cache=False, + cache_path=None): + """ + Wanted python3.5 * operator and keyword only arguments. Therefore just + wrap it all. + """ + if code is None and path is None: + raise TypeError("Please provide either code or a path.") + + if cache and code is None and path is not None: + # With the current architecture we cannot load from cache if the + # code is given, because we just load from cache if it's not older than + # the latest change (file last modified). + module_node = load_module(self, path, cache_path=cache_path) + if module_node is not None: + return module_node + + if code is None: + with open(path, 'rb') as f: + code = source_to_unicode(f.read()) + + lines = tokenize_lines = splitlines(code, keepends=True) + if diff_cache: + try: + module_cache_item = parser_cache[path] + except KeyError: + pass + else: + module_node = module_cache_item.node + old_lines = module_cache_item.lines + if old_lines == lines: + # TODO remove this line? I think it's not needed. (dave) + save_module(self, path, module_node, lines, pickling=False, + cache_path=cache_path) + return module_node + + # TODO I think it's wrong that we have self here. + new_node = DiffParser(self, module_node).update( + old_lines=old_lines, + new_lines=lines + ) + save_module(self, path, new_node, lines, pickling=cache, + cache_path=cache_path) + return new_node + + added_newline = not code.endswith('\n') + if added_newline: + code += '\n' + tokenize_lines = list(tokenize_lines) + tokenize_lines[-1] += '\n' + tokenize_lines.append('') + + tokens = generate_tokens(tokenize_lines, use_exact_op_types=True) + + p = Parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol) + root_node = p.parse(tokens=tokens) + if added_newline: + remove_last_newline(root_node) + + if cache or diff_cache: + save_module(self, path, root_node, lines, pickling=cache, + cache_path=cache_path) + return root_node + def __repr__(self): labels = self._pgen_grammar.symbol2number.values() txt = ' '.join(list(labels)[:3]) + ' ...' diff --git a/parso/python/__init__.py b/parso/python/__init__.py index 4c3a7b6..baa31ab 100644 --- a/parso/python/__init__.py +++ b/parso/python/__init__.py @@ -1,98 +1,8 @@ """ Parsers for Python """ -from parso.utils import splitlines, source_to_unicode -from parso.python.parser import Parser, remove_last_newline -from parso.python.diff import DiffParser -from parso.tokenize import generate_tokens -from parso.cache import parser_cache, load_module, save_module - -def parse(code=None, **kwargs): - """ - If you want to parse a Python file you want to start here, most likely. - - If you need finer grained control over the parsed instance, there will be - other ways to access it. - - :param code: A unicode string that contains Python code. - :param path: The path to the file you want to open. Only needed for caching. - :param grammar: A Python grammar file, created with load_grammar. You may - not specify it. In that case it's the current Python version. - :param error_recovery: If enabled, any code will be returned. If it is - invalid, it will be returned as an error node. If disabled, you will - get a ParseError when encountering syntax errors in your code. - :param start_symbol: The grammar symbol that you want to parse. Only - allowed to be used when error_recovery is disabled. - :param cache_path: If given saves the parso cache in this directory. If not - given, defaults to the default cache places on each platform. - - :return: A syntax tree node. Typically the module. - """ - # Wanted python3.5 * operator and keyword only arguments. Therefore just - # wrap it all. - def _parse(code=None, path=None, grammar=None, error_recovery=True, - start_symbol='file_input', cache=False, diff_cache=False, - cache_path=None): - - if code is None and path is None: - raise TypeError("Please provide either code or a path.") - - if grammar is None: - from parso import load_python_grammar - grammar = load_python_grammar() - - if cache and code is None and path is not None: - # With the current architecture we cannot load from cache if the - # code is given, because we just load from cache if it's not older than - # the latest change (file last modified). - module_node = load_module(grammar, path, cache_path=cache_path) - if module_node is not None: - return module_node - - if code is None: - with open(path, 'rb') as f: - code = source_to_unicode(f.read()) - - lines = tokenize_lines = splitlines(code, keepends=True) - if diff_cache: - try: - module_cache_item = parser_cache[path] - except KeyError: - pass - else: - module_node = module_cache_item.node - old_lines = module_cache_item.lines - if old_lines == lines: - # TODO remove this line? I think it's not needed. (dave) - save_module(grammar, path, module_node, lines, pickling=False, - cache_path=cache_path) - return module_node - - new_node = DiffParser(grammar, module_node).update( - old_lines=old_lines, - new_lines=lines - ) - save_module(grammar, path, new_node, lines, pickling=cache, - cache_path=cache_path) - return new_node - - added_newline = not code.endswith('\n') - if added_newline: - code += '\n' - tokenize_lines = list(tokenize_lines) - tokenize_lines[-1] += '\n' - tokenize_lines.append('') - - tokens = generate_tokens(tokenize_lines, use_exact_op_types=True) - - p = Parser(grammar._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol) - root_node = p.parse(tokens=tokens) - if added_newline: - remove_last_newline(root_node) - - if cache or diff_cache: - save_module(grammar, path, root_node, lines, pickling=cache, - cache_path=cache_path) - return root_node - return _parse(code=code, **kwargs) +def parse(code, **kwargs): + from parso import load_python_grammar + grammar = load_python_grammar() + return grammar.parse(code, **kwargs) diff --git a/test/test_parser.py b/test/test_parser.py index 73556a6..fe6d462 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -114,7 +114,7 @@ def test_param_splitting(): def check(src, result): # Python 2 tuple params should be ignored for now. grammar = load_python_grammar('%s.%s' % sys.version_info[:2]) - m = parse(src, grammar=grammar) + m = grammar.parse(src) if py_version >= 30: assert not list(m.iter_funcdefs()) else: diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 671c4fd..612e9da 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -10,7 +10,6 @@ from textwrap import dedent from parso._compatibility import py_version from parso import load_python_grammar -from parso.python import parse as _parse from parso import ParserSyntaxError import pytest @@ -18,7 +17,7 @@ import pytest def parse(code, version='3.4'): code = dedent(code) + "\n\n" grammar = load_python_grammar(version=version) - return _parse(code, grammar=grammar, error_recovery=False) + return grammar.parse(code, error_recovery=False) def test_formfeed():