Rework the parser so we can use arbitrary start nodes of the syntax.

This also includes a rework for error recovery in the parser. This is now just possible for file_input parsing, which means for full files. Includes also a refactoring of the tokenizer. No more do we have to add an additional newline, because it now works correctly (removes certain confusion.
2015-12-20 22:25:41 +01:00
parent 9a93d599da
commit c4906e0e3f
22 changed files with 246 additions and 198 deletions
@@ -4,7 +4,7 @@ Python 2.X)
 """
 import jedi
 from jedi._compatibility import u
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from .. import helpers


@@ -12,7 +12,7 @@ def test_explicit_absolute_imports():
    """
    Detect modules with ``from __future__ import absolute_import``.
    """
-    parser = Parser(load_grammar(), u("from __future__ import absolute_import"), "test.py")
+    parser = ParserWithRecovery(load_grammar(), u("from __future__ import absolute_import"), "test.py")
    assert parser.module.has_explicit_absolute_import


@@ -20,7 +20,7 @@ def test_no_explicit_absolute_imports():
    """
     Detect modules without ``from __future__ import absolute_import``.
    """
-    parser = Parser(load_grammar(), u("1"), "test.py")
+    parser = ParserWithRecovery(load_grammar(), u("1"), "test.py")
    assert not parser.module.has_explicit_absolute_import


@@ -30,7 +30,7 @@ def test_dont_break_imports_without_namespaces():
    assume that all imports have non-``None`` namespaces.
    """
    src = u("from __future__ import absolute_import\nimport xyzzy")
-    parser = Parser(load_grammar(), src, "test.py")
+    parser = ParserWithRecovery(load_grammar(), src, "test.py")
    assert parser.module.has_explicit_absolute_import


@@ -35,3 +35,20 @@ def test_simple_annotations():
    annot('')""")

    assert [d.name for d in jedi.Script(source, ).goto_definitions()] == ['int']
+
+
+@pytest.mark.skipif('sys.version_info[0] < 3')
+@pytest.mark.parametrize('reference', [
+    'assert 1',
+    '1',
+    'lambda: 3',
+    'def x(): pass',
+    '1, 2',
+    r'1\n'
+])
+def test_illegal_forward_references(reference):
+    source = """
+    def foo(bar: "%s"):
+        bar""" % reference
+
+    assert not jedi.Script(source).goto_definitions()
@@ -7,7 +7,7 @@ from jedi.evaluate.sys_path import (_get_parent_dir_with_file,
                                    sys_path_with_modifications,
                                    _check_module)
 from jedi.evaluate import Evaluator
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar

 from ..helpers import cwd_at

@@ -37,7 +37,7 @@ def test_append_on_non_sys_path():
        d = Dummy()
        d.path.append('foo')"""))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert len(paths) > 0
    assert 'foo' not in paths
@@ -48,7 +48,7 @@ def test_path_from_invalid_sys_path_assignment():
        import sys
        sys.path = 'invalid'"""))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert len(paths) > 0
    assert 'invalid' not in paths
@@ -60,7 +60,7 @@ def test_sys_path_with_modifications():
        import os
    """))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    p.module.path = os.path.abspath(os.path.join(os.curdir, 'module_name.py'))
    paths = sys_path_with_modifications(Evaluator(grammar), p.module)
    assert '/tmp/.buildout/eggs/important_package.egg' in paths
@@ -83,7 +83,7 @@ def test_path_from_sys_path_assignment():
        if __name__ == '__main__':
            sys.exit(important_package.main())"""))
    grammar = load_grammar()
-    p = Parser(grammar, SRC)
+    p = ParserWithRecovery(grammar, SRC)
    paths = _check_module(Evaluator(grammar), p.module)
    assert 1 not in paths
    assert '/home/test/.buildout/eggs/important_package.egg' in paths
@@ -5,14 +5,14 @@ import sys
 import pytest

 from jedi._compatibility import unicode
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.evaluate import sys_path, Evaluator


 def test_paths_from_assignment():
    def paths(src):
        grammar = load_grammar()
-        stmt = Parser(grammar, unicode(src)).module.statements[0]
+        stmt = ParserWithRecovery(grammar, unicode(src)).module.statements[0]
        return set(sys_path._paths_from_assignment(Evaluator(grammar), stmt))

    assert paths('sys.path[0:0] = ["a"]') == set(['a'])
@@ -1,11 +1,11 @@
 from jedi._compatibility import u
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar


 def test_basic_parsing():
    def compare(string):
        """Generates the AST object and then regenerates the code."""
-        assert Parser(load_grammar(), string).module.get_code() == string
+        assert ParserWithRecovery(load_grammar(), string).module.get_code() == string

    compare(u('\na #pass\n'))
    compare(u('wblabla* 1\t\n'))
@@ -3,7 +3,7 @@ import difflib
 import pytest

 from jedi._compatibility import u
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar

 code_basic_features = u('''
 """A mod docstring"""
@@ -44,7 +44,7 @@ def diff_code_assert(a, b, n=4):
 def test_basic_parsing():
    """Validate the parsing features"""

-    prs = Parser(load_grammar(), code_basic_features)
+    prs = ParserWithRecovery(load_grammar(), code_basic_features)
    diff_code_assert(
        code_basic_features,
        prs.module.get_code()
@@ -53,7 +53,7 @@ def test_basic_parsing():

 def test_operators():
    src = u('5  * 3')
-    prs = Parser(load_grammar(), src)
+    prs = ParserWithRecovery(load_grammar(), src)
    diff_code_assert(src, prs.module.get_code())


@@ -82,7 +82,7 @@ def method_with_docstring():
    """class docstr"""
    pass
 ''')
-    assert Parser(load_grammar(), s).module.get_code() == s
+    assert ParserWithRecovery(load_grammar(), s).module.get_code() == s


 def test_end_newlines():
@@ -92,7 +92,7 @@ def test_end_newlines():
    line the parser needs.
    """
    def test(source, end_pos):
-        module = Parser(load_grammar(), u(source)).module
+        module = ParserWithRecovery(load_grammar(), u(source)).module
        assert module.get_code() == source
        assert module.end_pos == end_pos

@@ -3,7 +3,7 @@ import sys

 import jedi
 from jedi._compatibility import u, is_py3
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.parser.user_context import UserContextParser
 from jedi.parser import tree as pt
 from textwrap import dedent
@@ -23,7 +23,7 @@ def test_user_statement_on_import():
 class TestCallAndName():
    def get_call(self, source):
        # Get the simple_stmt and then the first one.
-        simple_stmt = Parser(load_grammar(), u(source)).module.children[0]
+        simple_stmt = ParserWithRecovery(load_grammar(), u(source)).module.children[0]
        return simple_stmt.children[0]

    def test_name_and_call_positions(self):
@@ -58,7 +58,7 @@ class TestCallAndName():

 class TestSubscopes():
    def get_sub(self, source):
-        return Parser(load_grammar(), u(source)).module.subscopes[0]
+        return ParserWithRecovery(load_grammar(), u(source)).module.subscopes[0]

    def test_subscope_names(self):
        name = self.get_sub('class Foo: pass').name
@@ -74,7 +74,7 @@ class TestSubscopes():

 class TestImports():
    def get_import(self, source):
-        return Parser(load_grammar(), source).module.imports[0]
+        return ParserWithRecovery(load_grammar(), source).module.imports[0]

    def test_import_names(self):
        imp = self.get_import(u('import math\n'))
@@ -89,13 +89,13 @@ class TestImports():


 def test_module():
-    module = Parser(load_grammar(), u('asdf'), 'example.py').module
+    module = ParserWithRecovery(load_grammar(), u('asdf'), 'example.py').module
    name = module.name
    assert str(name) == 'example'
    assert name.start_pos == (1, 0)
    assert name.end_pos == (1, 7)

-    module = Parser(load_grammar(), u('asdf')).module
+    module = ParserWithRecovery(load_grammar(), u('asdf')).module
    name = module.name
    assert str(name) == ''
    assert name.start_pos == (1, 0)
@@ -108,7 +108,7 @@ def test_end_pos():
                 def func():
                     y = None
                 '''))
-    parser = Parser(load_grammar(), s)
+    parser = ParserWithRecovery(load_grammar(), s)
    scope = parser.module.subscopes[0]
    assert scope.start_pos == (3, 0)
    assert scope.end_pos == (5, 0)
@@ -121,7 +121,7 @@ def test_carriage_return_statements():
        # this is a namespace package
    '''))
    source = source.replace('\n', '\r\n')
-    stmt = Parser(load_grammar(), source).module.statements[0]
+    stmt = ParserWithRecovery(load_grammar(), source).module.statements[0]
    assert '#' not in stmt.get_code()


@@ -129,7 +129,7 @@ def test_incomplete_list_comprehension():
    """ Shouldn't raise an error, same bug as #418. """
    # With the old parser this actually returned a statement. With the new
    # parser only valid statements generate one.
-    assert Parser(load_grammar(), u('(1 for def')).module.statements == []
+    assert ParserWithRecovery(load_grammar(), u('(1 for def')).module.statements == []


 def test_hex_values_in_docstring():
@@ -141,7 +141,7 @@ def test_hex_values_in_docstring():
            return 1
        '''

-    doc = Parser(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
+    doc = ParserWithRecovery(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
    if is_py3:
        assert doc == '\xff'
    else:
@@ -160,7 +160,7 @@ def test_error_correction_with():


 def test_newline_positions():
-    endmarker = Parser(load_grammar(), u('a\n')).module.children[-1]
+    endmarker = ParserWithRecovery(load_grammar(), u('a\n')).module.children[-1]
    assert endmarker.end_pos == (2, 0)
    new_line = endmarker.get_previous()
    assert new_line.start_pos == (1, 1)
@@ -174,7 +174,7 @@ def test_end_pos_error_correction():
    end_pos, even if something breaks in the parser (error correction).
    """
    s = u('def x():\n .')
-    m = Parser(load_grammar(), s).module
+    m = ParserWithRecovery(load_grammar(), s).module
    func = m.children[0]
    assert func.type == 'funcdef'
    # This is not exactly correct, but ok, because it doesn't make a difference
@@ -191,7 +191,7 @@ def test_param_splitting():
    def check(src, result):
        # Python 2 tuple params should be ignored for now.
        grammar = load_grammar('grammar%s.%s' % sys.version_info[:2])
-        m = Parser(grammar, u(src)).module
+        m = ParserWithRecovery(grammar, u(src)).module
        if is_py3:
            assert not m.subscopes
        else:
@@ -211,5 +211,5 @@ def test_unicode_string():

 def test_backslash_dos_style():
    grammar = load_grammar()
-    m = Parser(grammar, u('\\\r\n')).module
+    m = ParserWithRecovery(grammar, u('\\\r\n')).module
    assert m
@@ -5,7 +5,7 @@ from textwrap import dedent
 import pytest

 from jedi._compatibility import u, unicode
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar
 from jedi.parser import tree as pt


@@ -27,7 +27,7 @@ class TestsFunctionAndLambdaParsing(object):

    @pytest.fixture(params=FIXTURES)
    def node(self, request):
-        parsed = Parser(load_grammar(), dedent(u(request.param[0])))
+        parsed = ParserWithRecovery(load_grammar(), dedent(u(request.param[0])))
        request.keywords['expected'] = request.param[1]
        return parsed.module.subscopes[0]

@@ -7,7 +7,7 @@ import pytest

 from jedi._compatibility import u, is_py3
 from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT
-from jedi.parser import Parser, load_grammar, tokenize
+from jedi.parser import ParserWithRecovery, load_grammar, tokenize


 from ..helpers import unittest
@@ -15,7 +15,7 @@ from ..helpers import unittest

 class TokenTest(unittest.TestCase):
    def test_end_pos_one_line(self):
-        parsed = Parser(load_grammar(), dedent(u('''
+        parsed = ParserWithRecovery(load_grammar(), dedent(u('''
        def testit():
            a = "huhu"
        ''')))
@@ -23,7 +23,7 @@ class TokenTest(unittest.TestCase):
        assert tok.end_pos == (3, 14)

    def test_end_pos_multi_line(self):
-        parsed = Parser(load_grammar(), dedent(u('''
+        parsed = ParserWithRecovery(load_grammar(), dedent(u('''
        def testit():
            a = """huhu
        asdfasdf""" + "h"
@@ -108,7 +108,7 @@ class TokenTest(unittest.TestCase):
        ]

        for s in string_tokens:
-            parsed = Parser(load_grammar(), u('''a = %s\n''' % s))
+            parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
            simple_stmt = parsed.module.children[0]
            expr_stmt = simple_stmt.children[0]
            assert len(expr_stmt.children) == 3
@@ -15,7 +15,7 @@ from jedi._compatibility import u
 from jedi import Script
 from jedi import api
 from jedi.evaluate import imports
-from jedi.parser import Parser, load_grammar
+from jedi.parser import ParserWithRecovery, load_grammar

 #jedi.set_debug_function()

@@ -102,7 +102,7 @@ class TestRegression(TestCase):
    def test_end_pos_line(self):
        # jedi issue #150
        s = u("x()\nx( )\nx(  )\nx (  )")
-        parser = Parser(load_grammar(), s)
+        parser = ParserWithRecovery(load_grammar(), s)
        for i, s in enumerate(parser.module.statements):
            assert s.end_pos == (i + 1, i + 3)