1
0
forked from VimPlug/jedi

Refactored the parser calls. Now it's possible to use jedi.parser.python.parse to quickly parse something.

This commit is contained in:
Dave Halter
2017-03-14 00:38:58 +01:00
parent 9b5e6d16da
commit 97fc3bc23c
23 changed files with 126 additions and 123 deletions

View File

@@ -13,7 +13,7 @@ import os
import warnings
import sys
from jedi.parser import load_grammar
from jedi.parser.python import load_grammar
from jedi.parser import tree
from jedi.parser.diff import FastParser
from jedi.parser.utils import save_parser

View File

@@ -9,7 +9,8 @@ import inspect
import types
from jedi._compatibility import is_py3, builtins, unicode, is_py34
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser import ParserWithRecovery
from jedi.parser.python import load_grammar
from jedi.parser import tree as pt
modules = {}

View File

@@ -22,7 +22,8 @@ from jedi._compatibility import u
from jedi.common import unite
from jedi.evaluate import context
from jedi.evaluate.cache import memoize_default
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser import ParserWithRecovery
from jedi.parser.python import load_grammar
from jedi.parser.tree import search_ancestor
from jedi.common import indent_block
from jedi.evaluate.iterable import SequenceLiteralContext, FakeSequence

View File

@@ -22,8 +22,8 @@ x support for type hint comments for functions, `# type: (int, str) -> int`.
import itertools
import os
from jedi.parser import \
Parser, load_grammar, ParseError, ParserWithRecovery, tree
from jedi.parser import Parser, ParseError, ParserWithRecovery, tree
from jedi.parser.python import load_grammar
from jedi.common import unite
from jedi.evaluate.cache import memoize_default
from jedi.evaluate import compiled

View File

@@ -1,2 +1,2 @@
from jedi.parser.parser import Parser, ParserWithRecovery, ParseError
from jedi.parser.python import load_grammar
from jedi.parser.pgen2.pgen import generate_grammar

View File

@@ -68,7 +68,7 @@ class Parser(object):
}
def __init__(self, grammar, source, start_symbol='file_input',
tokenizer=None, start_parsing=True):
tokens=None, start_parsing=True):
# Todo Remove start_parsing (with False)
self._used_names = {}
@@ -86,11 +86,11 @@ class Parser(object):
self._parsed = None
if start_parsing:
if tokenizer is None:
tokenizer = tokenize.source_tokens(source, use_exact_op_types=True)
self.parse(tokenizer)
if tokens is None:
tokens = tokenize.source_tokens(source, use_exact_op_types=True)
self.parse(tokens)
def parse(self, tokenizer):
def parse(self, tokens):
if self._parsed is not None:
return self._parsed
@@ -100,7 +100,7 @@ class Parser(object):
self.error_recovery, start_number
)
self._parsed = self.pgen_parser.parse(tokenizer)
self._parsed = self.pgen_parser.parse(tokens)
if self._start_symbol == 'file_input' != self._parsed.type:
# If there's only one statement, we get back a non-module. That's
@@ -205,7 +205,7 @@ class ParserWithRecovery(Parser):
:param module_path: The path of the module in the file system, may be None.
:type module_path: str
"""
def __init__(self, grammar, source, module_path=None, tokenizer=None,
def __init__(self, grammar, source, module_path=None, tokens=None,
start_parsing=True):
self.syntax_errors = []
@@ -224,7 +224,7 @@ class ParserWithRecovery(Parser):
# else:
super(ParserWithRecovery, self).__init__(
grammar, source,
tokenizer=tokenizer,
tokens=tokens,
start_parsing=start_parsing
)

View File

@@ -118,8 +118,8 @@ class PgenParser(object):
self.rootnode = None
self.error_recovery = error_recovery
def parse(self, tokenizer):
for type_, value, start_pos, prefix in tokenizer:
def parse(self, tokens):
for type_, value, start_pos, prefix in tokens:
if self.addtoken(type_, value, start_pos, prefix):
break
else:

View File

@@ -41,13 +41,20 @@ def load_grammar(version=None):
return load_grammar()
def parse(source, grammar=None, error_recovery=False):
def parse(code, grammar=None, error_recovery=True):
added_newline = not code.endswith('\n')
if added_newline:
code += '\n'
if grammar is None:
grammar = load_grammar()
tokens = source_tokens(source)
tokens = source_tokens(code, use_exact_op_types=True)
if error_recovery:
parser = ParserWithRecovery
else:
parser = Parser
return parser(grammar, tokens).get_root_node()
p = parser(grammar, code, tokens=tokens)
if added_newline:
p.remove_last_newline()
return p.get_root_node()

View File

@@ -14,7 +14,8 @@ The easiest way to play with this module is to use :class:`parsing.Parser`.
:attr:`parsing.Parser.module` holds an instance of :class:`Module`:
>>> from jedi._compatibility import u
>>> from jedi.parser import ParserWithRecovery, load_grammar
>>> from jedi.parser.python import load_grammar
>>> from jedi.parser import ParserWithRecovery
>>> parser = ParserWithRecovery(load_grammar(), u('import os'), 'example.py')
>>> submodule = parser.module
>>> submodule

View File

@@ -120,7 +120,8 @@ from functools import reduce
import jedi
from jedi import debug
from jedi._compatibility import unicode, is_py3
from jedi.parser import Parser, load_grammar
from jedi.parser import Parser
from jedi.parser.python import load_grammar
from jedi.api.classes import Definition
from jedi.api.completion import get_user_scope

View File

@@ -3,8 +3,7 @@ Tests ``from __future__ import absolute_import`` (only important for
Python 2.X)
"""
import jedi
from jedi._compatibility import u
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.python import parse
from .. import helpers
@@ -12,16 +11,15 @@ def test_explicit_absolute_imports():
"""
Detect modules with ``from __future__ import absolute_import``.
"""
parser = ParserWithRecovery(load_grammar(), u("from __future__ import absolute_import"), "test.py")
assert parser.module.has_explicit_absolute_import
module = parse("from __future__ import absolute_import")
assert module.has_explicit_absolute_import
def test_no_explicit_absolute_imports():
"""
Detect modules without ``from __future__ import absolute_import``.
"""
parser = ParserWithRecovery(load_grammar(), u("1"), "test.py")
assert not parser.module.has_explicit_absolute_import
assert not parse("1").has_explicit_absolute_import
def test_dont_break_imports_without_namespaces():
@@ -29,9 +27,8 @@ def test_dont_break_imports_without_namespaces():
The code checking for ``from __future__ import absolute_import`` shouldn't
assume that all imports have non-``None`` namespaces.
"""
src = u("from __future__ import absolute_import\nimport xyzzy")
parser = ParserWithRecovery(load_grammar(), src, "test.py")
assert parser.module.has_explicit_absolute_import
src = "from __future__ import absolute_import\nimport xyzzy"
assert parse(src).has_explicit_absolute_import
@helpers.cwd_at("test/test_evaluate/absolute_import")

View File

@@ -8,15 +8,15 @@ from jedi.evaluate.sys_path import (_get_parent_dir_with_file,
_check_module)
from jedi.evaluate import Evaluator
from jedi.evaluate.representation import ModuleContext
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.python import parse, load_grammar
from jedi.parser.parser import ParserWithRecovery
from ..helpers import cwd_at
def check_module_test(code):
grammar = load_grammar()
p = ParserWithRecovery(grammar, code)
module_context = ModuleContext(Evaluator(grammar), p.module)
module_context = ModuleContext(Evaluator(grammar), parse(code))
return _check_module(module_context)

View File

@@ -1,7 +1,7 @@
from textwrap import dedent
from jedi._compatibility import builtins, is_py3
from jedi.parser import load_grammar
from jedi.parser.python import load_grammar
from jedi.evaluate import compiled, instance
from jedi.evaluate.representation import FunctionContext
from jedi.evaluate import Evaluator

View File

@@ -1,11 +1,11 @@
from jedi._compatibility import u
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.python import parse
def test_basic_parsing():
def compare(string):
"""Generates the AST object and then regenerates the code."""
assert ParserWithRecovery(load_grammar(), string).module.get_code() == string
assert parse(string).get_code() == string
compare(u('\na #pass\n'))
compare(u('wblabla* 1\t\n'))

View File

@@ -4,10 +4,9 @@ import pytest
import jedi
from jedi import debug
from jedi._compatibility import u
from jedi.common import splitlines
from jedi import cache
from jedi.parser import load_grammar
from jedi.parser.python import load_grammar
from jedi.parser.diff import DiffParser
from jedi.parser import ParserWithRecovery
@@ -45,7 +44,7 @@ class Differ(object):
def initialize(self, source):
debug.dbg('differ: initialize', color='YELLOW')
grammar = load_grammar()
self.parser = ParserWithRecovery(grammar, u(source))
self.parser = ParserWithRecovery(grammar, source)
return self.parser.module
def parse(self, source, copies=0, parsers=0, expect_error_leaves=False):

View File

@@ -2,10 +2,9 @@ import difflib
import pytest
from jedi._compatibility import u
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.python import parse
code_basic_features = u('''
code_basic_features = '''
"""A mod docstring"""
def a_function(a_argument, a_default = "default"):
@@ -22,7 +21,7 @@ to""" + "huhu"
return str(a_result)
else
return None
''')
'''
def diff_code_assert(a, b, n=4):
@@ -44,22 +43,22 @@ def diff_code_assert(a, b, n=4):
def test_basic_parsing():
"""Validate the parsing features"""
prs = ParserWithRecovery(load_grammar(), code_basic_features)
m = parse(code_basic_features)
diff_code_assert(
code_basic_features,
prs.module.get_code()
m.get_code()
)
def test_operators():
src = u('5 * 3')
prs = ParserWithRecovery(load_grammar(), src)
diff_code_assert(src, prs.module.get_code())
src = '5 * 3'
module = parse(src)
diff_code_assert(src, module.get_code())
def test_get_code():
"""Use the same code that the parser also generates, to compare"""
s = u('''"""a docstring"""
s = '''"""a docstring"""
class SomeClass(object, mixin):
def __init__(self):
self.xy = 3.0
@@ -81,8 +80,8 @@ class WithDocstring:
def method_with_docstring():
"""class docstr"""
pass
''')
assert ParserWithRecovery(load_grammar(), s).module.get_code() == s
'''
assert parse(s).get_code() == s
def test_end_newlines():
@@ -92,7 +91,7 @@ def test_end_newlines():
line the parser needs.
"""
def test(source, end_pos):
module = ParserWithRecovery(load_grammar(), u(source)).module
module = parse(source)
assert module.get_code() == source
assert module.end_pos == end_pos

View File

@@ -10,7 +10,7 @@ from textwrap import dedent
import jedi
from jedi._compatibility import u
from jedi.parser import load_grammar
from jedi.parser.python import load_grammar
from jedi.parser.diff import FastParser
from jedi.parser.utils import save_parser

View File

@@ -5,7 +5,7 @@ instead of simple parser objects.
from textwrap import dedent
from jedi.parser import Parser, load_grammar
from jedi.parser.python import parse
def assert_params(param_string, **wanted_dct):
@@ -14,12 +14,12 @@ def assert_params(param_string, **wanted_dct):
pass
''') % param_string
parser = Parser(load_grammar(), dedent(source))
funcdef = parser.get_parsed_node().subscopes[0]
module = parse(source)
funcdef = module.subscopes[0]
dct = dict((p.name.value, p.default and p.default.get_code())
for p in funcdef.params)
assert dct == wanted_dct
assert parser.get_parsed_node().get_code() == source
assert module.get_code() == source
def test_split_params_with_separation_star():

View File

@@ -4,18 +4,19 @@ from textwrap import dedent
import jedi
from jedi._compatibility import u, is_py3
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser import ParserWithRecovery
from jedi.parser.python import parse, load_grammar
from jedi.parser import tree as pt
def test_user_statement_on_import():
"""github #285"""
s = u("from datetime import (\n"
" time)")
s = "from datetime import (\n" \
" time)"
for pos in [(2, 1), (2, 4)]:
p = ParserWithRecovery(load_grammar(), s)
stmt = p.module.get_statement_for_position(pos)
p = parse(s)
stmt = p.get_statement_for_position(pos)
assert isinstance(stmt, pt.Import)
assert [str(n) for n in stmt.get_defined_names()] == ['time']
@@ -23,7 +24,7 @@ def test_user_statement_on_import():
class TestCallAndName():
def get_call(self, source):
# Get the simple_stmt and then the first one.
simple_stmt = ParserWithRecovery(load_grammar(), u(source)).module.children[0]
simple_stmt = parse(source).children[0]
return simple_stmt.children[0]
def test_name_and_call_positions(self):
@@ -58,7 +59,7 @@ class TestCallAndName():
class TestSubscopes():
def get_sub(self, source):
return ParserWithRecovery(load_grammar(), u(source)).module.subscopes[0]
return parse(source).subscopes[0]
def test_subscope_names(self):
name = self.get_sub('class Foo: pass').name
@@ -74,7 +75,7 @@ class TestSubscopes():
class TestImports():
def get_import(self, source):
return ParserWithRecovery(load_grammar(), source).module.imports[0]
return parse(source).imports[0]
def test_import_names(self):
imp = self.get_import(u('import math\n'))
@@ -103,25 +104,25 @@ def test_module():
def test_end_pos():
s = u(dedent('''
x = ['a', 'b', 'c']
def func():
y = None
'''))
parser = ParserWithRecovery(load_grammar(), s)
scope = parser.module.subscopes[0]
s = dedent('''
x = ['a', 'b', 'c']
def func():
y = None
''')
parser = parse(s)
scope = parser.subscopes[0]
assert scope.start_pos == (3, 0)
assert scope.end_pos == (5, 0)
def test_carriage_return_statements():
source = u(dedent('''
source = dedent('''
foo = 'ns1!'
# this is a namespace package
'''))
''')
source = source.replace('\n', '\r\n')
stmt = ParserWithRecovery(load_grammar(), source).module.statements[0]
stmt = parse(source).statements[0]
assert '#' not in stmt.get_code()
@@ -129,7 +130,7 @@ def test_incomplete_list_comprehension():
""" Shouldn't raise an error, same bug as #418. """
# With the old parser this actually returned a statement. With the new
# parser only valid statements generate one.
assert ParserWithRecovery(load_grammar(), u('(1 for def')).module.statements == []
assert parse('(1 for def').statements == []
def test_hex_values_in_docstring():
@@ -141,7 +142,7 @@ def test_hex_values_in_docstring():
return 1
'''
doc = ParserWithRecovery(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
doc = parse(source).subscopes[0].raw_doc
if is_py3:
assert doc == '\xff'
else:
@@ -160,7 +161,7 @@ def test_error_correction_with():
def test_newline_positions():
endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1]
endmarker = parse('a\n').children[-1]
assert endmarker.end_pos == (2, 0)
new_line = endmarker.get_previous_leaf()
assert new_line.start_pos == (1, 1)
@@ -173,8 +174,8 @@ def test_end_pos_error_correction():
grammar needs it. However, they are removed again. We still want the right
end_pos, even if something breaks in the parser (error correction).
"""
s = u('def x():\n .')
m = ParserWithRecovery(load_grammar(), s).module
s = 'def x():\n .'
m = parse(s)
func = m.children[0]
assert func.type == 'funcdef'
assert func.end_pos == (2, 2)
@@ -208,19 +209,17 @@ def test_unicode_string():
def test_backslash_dos_style():
grammar = load_grammar()
m = ParserWithRecovery(grammar, u('\\\r\n')).module
assert m
assert parse('\\\r\n')
def test_started_lambda_stmt():
p = ParserWithRecovery(load_grammar(), u'lambda a, b: a i')
assert p.get_parsed_node().children[0].type == 'error_node'
m = parse(u'lambda a, b: a i')
assert m.children[0].type == 'error_node'
def test_python2_octal():
parser = ParserWithRecovery(load_grammar(), u'0660')
first = parser.get_parsed_node().children[0]
module = parse('0660')
first = module.children[0]
if is_py3:
assert first.type == 'error_node'
else:
@@ -228,8 +227,7 @@ def test_python2_octal():
def test_python3_octal():
parser = ParserWithRecovery(load_grammar(), u'0o660')
module = parser.get_parsed_node()
module = parse('0o660')
if is_py3:
assert module.children[0].children[0].type == 'number'
else:

View File

@@ -5,7 +5,7 @@ from textwrap import dedent
import pytest
from jedi._compatibility import u, unicode
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.python import parse
from jedi.parser import tree as pt
@@ -27,9 +27,9 @@ class TestsFunctionAndLambdaParsing(object):
@pytest.fixture(params=FIXTURES)
def node(self, request):
parsed = ParserWithRecovery(load_grammar(), dedent(u(request.param[0])))
parsed = parse(dedent(request.param[0]))
request.keywords['expected'] = request.param[1]
return parsed.module.subscopes[0]
return parsed.subscopes[0]
@pytest.fixture()
def expected(self, request, node):

View File

@@ -8,9 +8,9 @@ test_grammar.py files from both Python 2 and Python 3.
from textwrap import dedent
from jedi._compatibility import unicode, is_py3
from jedi.parser import Parser, load_grammar, ParseError
from jedi._compatibility import is_py3
from jedi.parser.python import parse as _parse, load_grammar
from jedi.parser import ParseError
import pytest
from test.helpers import TestCase
@@ -19,7 +19,7 @@ from test.helpers import TestCase
def parse(code, version='3.4'):
code = dedent(code) + "\n\n"
grammar = load_grammar(version=version)
return Parser(grammar, unicode(code), 'file_input').get_parsed_node()
return _parse(code, grammar, error_recovery=False)
class TestDriver(TestCase):

View File

@@ -2,9 +2,10 @@
from textwrap import dedent
from jedi._compatibility import u, is_py3, py_version
from jedi._compatibility import is_py3, py_version
from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER
from jedi.parser import ParserWithRecovery, load_grammar, tokenize
from jedi.parser import tokenize
from jedi.parser.python import parse
from jedi.common import splitlines
from jedi.parser.tokenize import TokenInfo
@@ -17,25 +18,25 @@ def _get_token_list(string):
class TokenTest(unittest.TestCase):
def test_end_pos_one_line(self):
parsed = ParserWithRecovery(load_grammar(), dedent(u('''
parsed = parse(dedent('''
def testit():
a = "huhu"
''')))
tok = parsed.module.subscopes[0].statements[0].children[2]
'''))
tok = parsed.subscopes[0].statements[0].children[2]
assert tok.end_pos == (3, 14)
def test_end_pos_multi_line(self):
parsed = ParserWithRecovery(load_grammar(), dedent(u('''
parsed = parse(dedent('''
def testit():
a = """huhu
asdfasdf""" + "h"
''')))
tok = parsed.module.subscopes[0].statements[0].children[2].children[0]
'''))
tok = parsed.subscopes[0].statements[0].children[2].children[0]
assert tok.end_pos == (4, 11)
def test_simple_no_whitespace(self):
# Test a simple one line string, no preceding whitespace
simple_docstring = u('"""simple one line docstring"""')
simple_docstring = '"""simple one line docstring"""'
tokens = tokenize.source_tokens(simple_docstring)
token_list = list(tokens)
_, value, _, prefix = token_list[0]
@@ -44,7 +45,7 @@ class TokenTest(unittest.TestCase):
def test_simple_with_whitespace(self):
# Test a simple one line string with preceding whitespace and newline
simple_docstring = u(' """simple one line docstring""" \r\n')
simple_docstring = ' """simple one line docstring""" \r\n'
tokens = tokenize.source_tokens(simple_docstring)
token_list = list(tokens)
assert token_list[0][0] == INDENT
@@ -58,12 +59,12 @@ class TokenTest(unittest.TestCase):
def test_function_whitespace(self):
# Test function definition whitespace identification
fundef = dedent(u('''
fundef = dedent('''
def test_whitespace(*args, **kwargs):
x = 1
if x > 0:
print(True)
'''))
''')
tokens = tokenize.source_tokens(fundef)
token_list = list(tokens)
for _, value, _, prefix in token_list:
@@ -83,7 +84,7 @@ class TokenTest(unittest.TestCase):
def test_tokenize_multiline_I(self):
# Make sure multiline string having newlines have the end marker on the
# next line
fundef = u('''""""\n''')
fundef = '''""""\n'''
tokens = tokenize.source_tokens(fundef)
token_list = list(tokens)
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
@@ -92,7 +93,7 @@ class TokenTest(unittest.TestCase):
def test_tokenize_multiline_II(self):
# Make sure multiline string having no newlines have the end marker on
# same line
fundef = u('''""""''')
fundef = '''""""'''
tokens = tokenize.source_tokens(fundef)
token_list = list(tokens)
assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
@@ -101,17 +102,17 @@ class TokenTest(unittest.TestCase):
def test_tokenize_multiline_III(self):
# Make sure multiline string having newlines have the end marker on the
# next line even if several newline
fundef = u('''""""\n\n''')
fundef = '''""""\n\n'''
tokens = tokenize.source_tokens(fundef)
token_list = list(tokens)
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
TokenInfo(ENDMARKER, '', (3, 0), '')]
def test_identifier_contains_unicode(self):
fundef = dedent(u('''
fundef = dedent('''
def 我あφ():
pass
'''))
''')
tokens = tokenize.source_tokens(fundef)
token_list = list(tokens)
unicode_token = token_list[1]
@@ -133,8 +134,8 @@ class TokenTest(unittest.TestCase):
]
for s in string_tokens:
parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
simple_stmt = parsed.module.children[0]
module = parse('''a = %s\n''' % s)
simple_stmt = module.children[0]
expr_stmt = simple_stmt.children[0]
assert len(expr_stmt.children) == 3
string_tok = expr_stmt.children[2]

View File

@@ -7,16 +7,14 @@ import os
import sys
import textwrap
from .helpers import TestCase, cwd_at
import pytest
import jedi
from jedi._compatibility import u
from jedi import Script
from jedi import api
from jedi import common
from jedi.evaluate import imports
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.python import parse
from .helpers import TestCase, cwd_at
#jedi.set_debug_function()
@@ -102,9 +100,9 @@ class TestRegression(TestCase):
def test_end_pos_line(self):
# jedi issue #150
s = u("x()\nx( )\nx( )\nx ( )")
parser = ParserWithRecovery(load_grammar(), s)
for i, s in enumerate(parser.module.statements):
s = "x()\nx( )\nx( )\nx ( )"
module = parse(s)
for i, s in enumerate(module.statements):
assert s.end_pos == (i + 1, i + 3)
def check_definition_by_marker(self, source, after_cursor, names):