Rework the parser so we can use arbitrary start nodes of the syntax.

This also includes a rework for error recovery in the parser. This is now just possible for file_input parsing, which means for full files.
Includes also a refactoring of the tokenizer. No more do we have to add an additional newline, because it now works correctly (removes certain confusion.
This commit is contained in:
Dave Halter
2015-12-20 22:21:47 +01:00
parent 9a93d599da
commit c4906e0e3f
22 changed files with 246 additions and 198 deletions

View File

@@ -3,7 +3,7 @@ import sys
import jedi
from jedi._compatibility import u, is_py3
from jedi.parser import Parser, load_grammar
from jedi.parser import ParserWithRecovery, load_grammar
from jedi.parser.user_context import UserContextParser
from jedi.parser import tree as pt
from textwrap import dedent
@@ -23,7 +23,7 @@ def test_user_statement_on_import():
class TestCallAndName():
def get_call(self, source):
# Get the simple_stmt and then the first one.
simple_stmt = Parser(load_grammar(), u(source)).module.children[0]
simple_stmt = ParserWithRecovery(load_grammar(), u(source)).module.children[0]
return simple_stmt.children[0]
def test_name_and_call_positions(self):
@@ -58,7 +58,7 @@ class TestCallAndName():
class TestSubscopes():
def get_sub(self, source):
return Parser(load_grammar(), u(source)).module.subscopes[0]
return ParserWithRecovery(load_grammar(), u(source)).module.subscopes[0]
def test_subscope_names(self):
name = self.get_sub('class Foo: pass').name
@@ -74,7 +74,7 @@ class TestSubscopes():
class TestImports():
def get_import(self, source):
return Parser(load_grammar(), source).module.imports[0]
return ParserWithRecovery(load_grammar(), source).module.imports[0]
def test_import_names(self):
imp = self.get_import(u('import math\n'))
@@ -89,13 +89,13 @@ class TestImports():
def test_module():
module = Parser(load_grammar(), u('asdf'), 'example.py').module
module = ParserWithRecovery(load_grammar(), u('asdf'), 'example.py').module
name = module.name
assert str(name) == 'example'
assert name.start_pos == (1, 0)
assert name.end_pos == (1, 7)
module = Parser(load_grammar(), u('asdf')).module
module = ParserWithRecovery(load_grammar(), u('asdf')).module
name = module.name
assert str(name) == ''
assert name.start_pos == (1, 0)
@@ -108,7 +108,7 @@ def test_end_pos():
def func():
y = None
'''))
parser = Parser(load_grammar(), s)
parser = ParserWithRecovery(load_grammar(), s)
scope = parser.module.subscopes[0]
assert scope.start_pos == (3, 0)
assert scope.end_pos == (5, 0)
@@ -121,7 +121,7 @@ def test_carriage_return_statements():
# this is a namespace package
'''))
source = source.replace('\n', '\r\n')
stmt = Parser(load_grammar(), source).module.statements[0]
stmt = ParserWithRecovery(load_grammar(), source).module.statements[0]
assert '#' not in stmt.get_code()
@@ -129,7 +129,7 @@ def test_incomplete_list_comprehension():
""" Shouldn't raise an error, same bug as #418. """
# With the old parser this actually returned a statement. With the new
# parser only valid statements generate one.
assert Parser(load_grammar(), u('(1 for def')).module.statements == []
assert ParserWithRecovery(load_grammar(), u('(1 for def')).module.statements == []
def test_hex_values_in_docstring():
@@ -141,7 +141,7 @@ def test_hex_values_in_docstring():
return 1
'''
doc = Parser(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
doc = ParserWithRecovery(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
if is_py3:
assert doc == '\xff'
else:
@@ -160,7 +160,7 @@ def test_error_correction_with():
def test_newline_positions():
endmarker = Parser(load_grammar(), u('a\n')).module.children[-1]
endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1]
assert endmarker.end_pos == (2, 0)
new_line = endmarker.get_previous()
assert new_line.start_pos == (1, 1)
@@ -174,7 +174,7 @@ def test_end_pos_error_correction():
end_pos, even if something breaks in the parser (error correction).
"""
s = u('def x():\n .')
m = Parser(load_grammar(), s).module
m = ParserWithRecovery(load_grammar(), s).module
func = m.children[0]
assert func.type == 'funcdef'
# This is not exactly correct, but ok, because it doesn't make a difference
@@ -191,7 +191,7 @@ def test_param_splitting():
def check(src, result):
# Python 2 tuple params should be ignored for now.
grammar = load_grammar('grammar%s.%s' % sys.version_info[:2])
m = Parser(grammar, u(src)).module
m = ParserWithRecovery(grammar, u(src)).module
if is_py3:
assert not m.subscopes
else:
@@ -211,5 +211,5 @@ def test_unicode_string():
def test_backslash_dos_style():
grammar = load_grammar()
m = Parser(grammar, u('\\\r\n')).module
m = ParserWithRecovery(grammar, u('\\\r\n')).module
assert m