Merge branch 'dev' into unicode_tokenize_fix2

Conflicts: AUTHORS.txt
2015-03-06 12:14:38 +04:00
parent f9c104348e a6c5d9f0a6
commit 32081bd156
105 changed files with 7728 additions and 6520 deletions
--- a/test/test_parser/test_fast_parser.py
+++ b/test/test_parser/test_fast_parser.py
@@ -3,6 +3,7 @@ from textwrap import dedent
 import jedi
 from jedi._compatibility import u
 from jedi import cache
+from jedi.parser import load_grammar
 from jedi.parser.fast import FastParser


@@ -12,15 +13,15 @@ def test_add_to_end():
    help of caches, this is an example that didn't work.
    """

-    a = """
-class Abc():
-    def abc(self):
-        self.x = 3
+    a = dedent("""
+    class Abc():
+        def abc(self):
+            self.x = 3

-class Two(Abc):
-    def h(self):
-        self
-"""  #      ^ here is the first completion
+    class Two(Abc):
+        def h(self):
+            self
+    """)      # ^ here is the first completion

    b = "    def g(self):\n" \
        "        self."
@@ -54,30 +55,369 @@ def test_carriage_return_splitting():
            pass
        '''))
    source = source.replace('\n', '\r\n')
-    p = FastParser(source)
-    assert [str(n) for n in p.module.get_defined_names()] == ['Foo']
+    p = FastParser(load_grammar(), source)
+    assert [n.value for lst in p.module.names_dict.values() for n in lst] == ['Foo']
+
+
+def test_split_parts():
+    cache.parser_cache.pop(None, None)
+
+    def splits(source):
+        class Mock(FastParser):
+            def __init__(self, *args):
+                self.number_of_splits = 0
+
+        return tuple(FastParser._split_parts(Mock(None, None), source))
+
+    def test(*parts):
+        assert splits(''.join(parts)) == parts
+
+    test('a\n\n', 'def b(): pass\n', 'c\n')
+    test('a\n', 'def b():\n pass\n', 'c\n')
+
+
+def check_fp(src, number_parsers_used, number_of_splits=None, number_of_misses=0):
+    if number_of_splits is None:
+        number_of_splits = number_parsers_used
+
+    p = FastParser(load_grammar(), u(src))
+    cache.save_parser(None, None, p, pickling=False)
+
+    # TODO Don't change get_code, the whole thing should be the same.
+    # -> Need to refactor the parser first, though.
+    assert src == p.module.get_code()
+    assert p.number_of_splits == number_of_splits
+    assert p.number_parsers_used == number_parsers_used
+    assert p.number_of_misses == number_of_misses
+    return p.module


 def test_change_and_undo():
-
-    def fp(src):
-        p = FastParser(u(src))
-        cache.save_parser(None, None, p, pickling=False)
-
-        # TODO Don't change get_code, the whole thing should be the same.
-        # -> Need to refactor the parser first, though.
-        assert src == p.module.get_code()[:-1]
-
+    # Empty the parser cache for the path None.
    cache.parser_cache.pop(None, None)
    func_before = 'def func():\n    pass\n'
-    fp(func_before + 'a')
-    fp(func_before + 'b')
-    fp(func_before + 'a')
+    # Parse the function and a.
+    check_fp(func_before + 'a', 2)
+    # Parse just b.
+    check_fp(func_before + 'b', 1, 2)
+    # b has changed to a again, so parse that.
+    check_fp(func_before + 'a', 1, 2)
+    # Same as before no parsers should be used.
+    check_fp(func_before + 'a', 0, 2)

+    # Getting rid of an old parser: Still no parsers used.
+    check_fp('a', 0, 1)
+    # Now the file has completely change and we need to parse.
+    check_fp('b', 1, 1)
+    # And again.
+    check_fp('a', 1, 1)
+
+
+def test_positions():
+    # Empty the parser cache for the path None.
    cache.parser_cache.pop(None, None)
-    fp('a')
-    fp('b')
-    fp('a')
+
+    func_before = 'class A:\n pass\n'
+    m = check_fp(func_before + 'a', 2)
+    assert m.start_pos == (1, 0)
+    assert m.end_pos == (3, 1)
+
+    m = check_fp('a', 0, 1)
+    assert m.start_pos == (1, 0)
+    assert m.end_pos == (1, 1)
+
+
+def test_if():
+    src = dedent('''\
+    def func():
+        x = 3
+        if x:
+            def y():
+                return x
+        return y()
+
+    func()
+    ''')
+
+    # Two parsers needed, one for pass and one for the function.
+    check_fp(src, 2)
+    assert [d.name for d in jedi.Script(src, 8, 6).goto_definitions()] == ['int']
+
+
+def test_if_simple():
+    src = dedent('''\
+    if 1:
+        a = 3
+    ''')
+    check_fp(src + 'a', 1)
+    check_fp(src + "else:\n    a = ''\na", 1)
+
+
+def test_for():
+    src = dedent("""\
+    for a in [1,2]:
+        a
+
+    for a1 in 1,"":
+        a1
+    """)
+    check_fp(src, 1)
+
+
+def test_class_with_class_var():
+    src = dedent("""\
+    class SuperClass:
+        class_super = 3
+        def __init__(self):
+            self.foo = 4
+    pass
+    """)
+    check_fp(src, 3)
+
+
+def test_func_with_if():
+    src = dedent("""\
+    def recursion(a):
+        if foo:
+            return recursion(a)
+        else:
+            if bar:
+                return inexistent
+            else:
+                return a
+    """)
+    check_fp(src, 1)
+
+
+def test_decorator():
+    src = dedent("""\
+    class Decorator():
+        @memoize
+        def dec(self, a):
+            return a
+    """)
+    check_fp(src, 2)
+
+
+def test_nested_funcs():
+    src = dedent("""\
+    def memoize(func):
+        def wrapper(*args, **kwargs):
+            return func(*args, **kwargs)
+        return wrapper
+    """)
+    check_fp(src, 3)
+
+
+def test_class_and_if():
+    src = dedent("""\
+    class V:
+        def __init__(self):
+            pass
+
+        if 1:
+            c = 3
+
+    def a_func():
+        return 1
+
+    # COMMENT
+    a_func()""")
+    check_fp(src, 5, 5)
+    assert [d.name for d in jedi.Script(src).goto_definitions()] == ['int']
+
+
+def test_func_with_for_and_comment():
+    # The first newline is important, leave it. It should not trigger another
+    # parser split.
+    src = dedent("""\
+
+    def func():
+        pass
+
+
+    for a in [1]:
+        # COMMENT
+        a""")
+    check_fp(src, 2)
+    # We don't need to parse the for loop, but we need to parse the other two,
+    # because the split is in a different place.
+    check_fp('a\n' + src, 2, 3)
+
+
+def test_multi_line_params():
+    src = dedent("""\
+    def x(a,
+          b):
+        pass
+
+    foo = 1
+    """)
+    check_fp(src, 2)
+
+
+def test_one_statement_func():
+    src = dedent("""\
+    first
+    def func(): a
+    """)
+    check_fp(src + 'second', 3)
+    # Empty the parser cache, because we're not interested in modifications
+    # here.
+    cache.parser_cache.pop(None, None)
+    check_fp(src + 'def second():\n a', 3)
+
+
+def test_class_func_if():
+    src = dedent("""\
+    class Class:
+        def func(self):
+            if 1:
+                a
+            else:
+                b
+
+    pass
+    """)
+    check_fp(src, 3)
+
+
+def test_for_on_one_line():
+    src = dedent("""\
+    foo = 1
+    for x in foo: pass
+
+    def hi():
+        pass
+    """)
+    check_fp(src, 2)
+
+    src = dedent("""\
+    def hi():
+        for x in foo: pass
+        pass
+
+    pass
+    """)
+    check_fp(src, 2)
+
+    src = dedent("""\
+    def hi():
+        for x in foo: pass
+
+        def nested():
+            pass
+    """)
+    check_fp(src, 2)
+
+
+def test_multi_line_for():
+    src = dedent("""\
+    for x in [1,
+              2]:
+        pass
+
+    pass
+    """)
+    check_fp(src, 1)
+
+
+def test_wrong_indentation():
+    src = dedent("""\
+    def func():
+        a
+         b
+        a
+    """)
+    check_fp(src, 1)
+
+    src = dedent("""\
+    def complex():
+        def nested():
+            a
+             b
+            a
+
+        def other():
+            pass
+    """)
+    check_fp(src, 3)
+
+
+def test_open_parentheses():
+    func = 'def func():\n a'
+    p = FastParser(load_grammar(), u('isinstance(\n\n' + func))
+    # As you can see, the isinstance call cannot be seen anymore after
+    # get_code, because it isn't valid code.
+    assert p.module.get_code() == '\n\n' + func
+    assert p.number_of_splits == 2
+    assert p.number_parsers_used == 2
+    cache.save_parser(None, None, p, pickling=False)
+
+    # Now with a correct parser it should work perfectly well.
+    check_fp('isinstance()\n' + func, 1, 2)
+
+
+def test_strange_parentheses():
+    src = dedent("""
+    class X():
+        a = (1
+    if 1 else 2)
+        def x():
+            pass
+    """)
+    check_fp(src, 2)
+
+
+def test_backslash():
+    src = dedent(r"""
+    a = 1\
+        if 1 else 2
+    def x():
+        pass
+    """)
+    check_fp(src, 2)
+
+    src = dedent(r"""
+    def x():
+        a = 1\
+    if 1 else 2
+        def y():
+            pass
+    """)
+    # The dangling if leads to not splitting where we theoretically could
+    # split.
+    check_fp(src, 2)
+
+    src = dedent(r"""
+    def first():
+        if foo \
+                and bar \
+                or baz:
+            pass
+    def second():
+        pass
+    """)
+    check_fp(src, 2)
+
+
+
+def test_fake_parentheses():
+    """
+    The fast parser splitting counts parentheses, but not as correct tokens.
+    Therefore parentheses in string tokens are included as well. This needs to
+    be accounted for.
+    """
+    src = dedent(r"""
+    def x():
+        a = (')'
+    if 1 else 2)
+        def y():
+            pass
+        def z():
+            pass
+    """)
+    check_fp(src, 3, 2, 1)


 def test_incomplete_function():
--- a/test/test_parser/test_get_code.py
+++ b/test/test_parser/test_get_code.py
@@ -3,7 +3,7 @@ import difflib
 import pytest

 from jedi._compatibility import u
-from jedi.parser import Parser
+from jedi.parser import Parser, load_grammar

 code_basic_features = u('''
 """A mod docstring"""
@@ -44,21 +44,19 @@ def diff_code_assert(a, b, n=4):
 def test_basic_parsing():
    """Validate the parsing features"""

-    prs = Parser(code_basic_features)
+    prs = Parser(load_grammar(), code_basic_features)
    diff_code_assert(
        code_basic_features,
-        prs.module.get_code2()
+        prs.module.get_code()
    )


-@pytest.mark.skipif('True', reason='Not yet working.')
 def test_operators():
    src = u('5  * 3')
-    prs = Parser(src)
+    prs = Parser(load_grammar(), src)
    diff_code_assert(src, prs.module.get_code())


-@pytest.mark.skipif('True', reason='Broke get_code support for yield/return statements.')
 def test_get_code():
    """Use the same code that the parser also generates, to compare"""
    s = u('''"""a docstring"""
@@ -84,4 +82,24 @@ def method_with_docstring():
    """class docstr"""
    pass
 ''')
-    assert Parser(s).module.get_code() == s
+    assert Parser(load_grammar(), s).module.get_code() == s
+
+
+def test_end_newlines():
+    """
+    The Python grammar explicitly needs a newline at the end. Jedi though still
+    wants to be able, to return the exact same code without the additional new
+    line the parser needs.
+    """
+    def test(source, end_pos):
+        module = Parser(load_grammar(), u(source)).module
+        assert module.get_code() == source
+        assert module.end_pos == end_pos
+
+    test('a', (1, 1))
+    test('a\n', (2, 0))
+    test('a\nb', (2, 1))
+    test('a\n#comment\n', (3, 0))
+    test('a\n#comment', (2, 8))
+    test('a#comment', (1, 9))
+    test('def a():\n pass', (2, 5))
--- a/test/test_parser/test_parser.py
+++ b/test/test_parser/test_parser.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-

+import jedi
 from jedi._compatibility import u, is_py3
-from jedi.parser import Parser
+from jedi.parser import Parser, load_grammar
 from jedi.parser.user_context import UserContextParser
-from jedi.parser import representation as pr
+from jedi.parser import tree as pt
 from textwrap import dedent


@@ -13,50 +14,50 @@ def test_user_statement_on_import():
          "    time)")

    for pos in [(2, 1), (2, 4)]:
-        p = UserContextParser(s, None, pos, None).user_stmt()
-        assert isinstance(p, pr.Import)
-        assert p.defunct is False
+        p = UserContextParser(load_grammar(), s, None, pos, None).user_stmt()
+        assert isinstance(p, pt.Import)
        assert [str(n) for n in p.get_defined_names()] == ['time']


 class TestCallAndName():
    def get_call(self, source):
-        stmt = Parser(u(source), no_docstr=True).module.statements[0]
-        return stmt.expression_list()[0]
+        # Get the simple_stmt and then the first one.
+        simple_stmt = Parser(load_grammar(), u(source)).module.children[0]
+        return simple_stmt.children[0]

    def test_name_and_call_positions(self):
-        call = self.get_call('name\nsomething_else')
-        assert str(call.name) == 'name'
-        assert call.name.start_pos == call.start_pos == (1, 0)
-        assert call.name.end_pos == call.end_pos == (1, 4)
+        name = self.get_call('name\nsomething_else')
+        assert str(name) == 'name'
+        assert name.start_pos == (1, 0)
+        assert name.end_pos == (1, 4)

-        call = self.get_call('1.0\n')
-        assert call.value == 1.0
-        assert call.start_pos == (1, 0)
-        assert call.end_pos == (1, 3)
+        leaf = self.get_call('1.0\n')
+        assert leaf.value == '1.0'
+        assert leaf.eval() == 1.0
+        assert leaf.start_pos == (1, 0)
+        assert leaf.end_pos == (1, 3)

    def test_call_type(self):
        call = self.get_call('hello')
-        assert isinstance(call, pr.Call)
-        assert type(call.name) == pr.Name
+        assert isinstance(call, pt.Name)

    def test_literal_type(self):
        literal = self.get_call('1.0')
-        assert isinstance(literal, pr.Literal)
-        assert type(literal.value) == float
+        assert isinstance(literal, pt.Literal)
+        assert type(literal.eval()) == float

        literal = self.get_call('1')
-        assert isinstance(literal, pr.Literal)
-        assert type(literal.value) == int
+        assert isinstance(literal, pt.Literal)
+        assert type(literal.eval()) == int

        literal = self.get_call('"hello"')
-        assert isinstance(literal, pr.Literal)
-        assert literal.value == 'hello'
+        assert isinstance(literal, pt.Literal)
+        assert literal.eval() == 'hello'


 class TestSubscopes():
    def get_sub(self, source):
-        return Parser(u(source)).module.subscopes[0]
+        return Parser(load_grammar(), u(source)).module.subscopes[0]

    def test_subscope_names(self):
        name = self.get_sub('class Foo: pass').name
@@ -72,7 +73,7 @@ class TestSubscopes():

 class TestImports():
    def get_import(self, source):
-        return Parser(source).module.imports[0]
+        return Parser(load_grammar(), source).module.imports[0]

    def test_import_names(self):
        imp = self.get_import(u('import math\n'))
@@ -87,13 +88,13 @@ class TestImports():


 def test_module():
-    module = Parser(u('asdf'), 'example.py', no_docstr=True).module
+    module = Parser(load_grammar(), u('asdf'), 'example.py').module
    name = module.name
    assert str(name) == 'example'
    assert name.start_pos == (1, 0)
    assert name.end_pos == (1, 7)

-    module = Parser(u('asdf'), no_docstr=True).module
+    module = Parser(load_grammar(), u('asdf')).module
    name = module.name
    assert str(name) == ''
    assert name.start_pos == (1, 0)
@@ -106,7 +107,7 @@ def test_end_pos():
                 def func():
                     y = None
                 '''))
-    parser = Parser(s)
+    parser = Parser(load_grammar(), s)
    scope = parser.module.subscopes[0]
    assert scope.start_pos == (3, 0)
    assert scope.end_pos == (5, 0)
@@ -119,14 +120,15 @@ def test_carriage_return_statements():
        # this is a namespace package
    '''))
    source = source.replace('\n', '\r\n')
-    stmt = Parser(source).module.statements[0]
+    stmt = Parser(load_grammar(), source).module.statements[0]
    assert '#' not in stmt.get_code()


 def test_incomplete_list_comprehension():
    """ Shouldn't raise an error, same bug as #418. """
-    s = Parser(u('(1 for def')).module.statements[0]
-    assert s.expression_list()
+    # With the old parser this actually returned a statement. With the new
+    # parser only valid statements generate one.
+    assert Parser(load_grammar(), u('(1 for def')).module.statements == []


 def test_hex_values_in_docstring():
@@ -138,8 +140,43 @@ def test_hex_values_in_docstring():
            return 1
        '''

-    doc = Parser(dedent(u(source))).module.subscopes[0].raw_doc
+    doc = Parser(load_grammar(), dedent(u(source))).module.subscopes[0].raw_doc
    if is_py3:
        assert doc == '\xff'
    else:
        assert doc == u('<EFBFBD>')
+
+
+def test_error_correction_with():
+    source = """
+    with open() as f:
+        try:
+            f."""
+    comps = jedi.Script(source).completions()
+    assert len(comps) > 30
+    # `open` completions have a closed attribute.
+    assert [1 for c in comps if c.name == 'closed']
+
+
+def test_newline_positions():
+    endmarker = Parser(load_grammar(), u('a\n')).module.children[-1]
+    assert endmarker.end_pos == (2, 0)
+    new_line = endmarker.get_previous()
+    assert new_line.start_pos == (1, 1)
+    assert new_line.end_pos == (2, 0)
+
+
+def test_end_pos_error_correction():
+    """
+    Source code without ending newline are given one, because the Python
+    grammar needs it. However, they are removed again. We still want the right
+    end_pos, even if something breaks in the parser (error correction).
+    """
+    s = u('def x():\n .')
+    m = Parser(load_grammar(), s).module
+    func = m.children[0]
+    assert func.type == 'funcdef'
+    # This is not exactly correct, but ok, because it doesn't make a difference
+    # at all. We just want to make sure that the module end_pos is correct!
+    assert func.end_pos == (3, 0)
+    assert m.end_pos == (2, 2)
--- a/test/test_parser/test_representation.py
+++ b/test/test_parser/test_representation.py
@@ -1,16 +0,0 @@
-from jedi.parser import Parser
-from jedi.parser import representation as pr
-from jedi._compatibility import u
-
-import pytest
-
-
-def test_import_is_nested():
-    imp = Parser(u('import ')).module.imports[0]
-    # should not raise an error, even if it's not a complete import
-    assert not imp.is_nested()
-
-
-@pytest.mark.skipif('True', 'Reenable this later, module should also have a scope_names_generator.')
-def test_module_scope_name_generator():
-    assert pr.Module().scope_names_generator()
--- a/test/test_parser/test_tokenize.py
+++ b/test/test_parser/test_tokenize.py
@@ -1,27 +1,101 @@
-from jedi._compatibility import u
+# -*- coding: utf-8    # This file contains Unicode characters.
+
+from io import StringIO
+from token import NEWLINE, STRING, INDENT
+
+from jedi._compatibility import u, is_py3
+from jedi.parser.token import NAME
 from jedi import parser
 from token import STRING

+
 from ..helpers import unittest


 class TokenTest(unittest.TestCase):
    def test_end_pos_one_line(self):
-        parsed = parser.Parser(u('''
+        parsed = parser.Parser(parser.load_grammar(), u('''
 def testit():
    a = "huhu"
 '''))
-        tok = parsed.module.subscopes[0].statements[0]._token_list[2]
-        self.assertEqual(tok.end_pos, (3, 14))
+        tok = parsed.module.subscopes[0].statements[0].children[2]
+        assert tok.end_pos == (3, 14)

    def test_end_pos_multi_line(self):
-        parsed = parser.Parser(u('''
+        parsed = parser.Parser(parser.load_grammar(), u('''
 def testit():
    a = """huhu
 asdfasdf""" + "h"
 '''))
-        tok = parsed.module.subscopes[0].statements[0]._token_list[2]
-        self.assertEqual(tok.end_pos, (4, 11))
+        tok = parsed.module.subscopes[0].statements[0].children[2].children[0]
+        assert tok.end_pos == (4, 11)
+
+    def test_simple_no_whitespace(self):
+        # Test a simple one line string, no preceding whitespace
+        simple_docstring = u('"""simple one line docstring"""')
+        simple_docstring_io = StringIO(simple_docstring)
+        tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
+        token_list = list(tokens)
+        _, value, _, prefix = token_list[0]
+        assert prefix == ''
+        assert value == '"""simple one line docstring"""'
+
+    def test_simple_with_whitespace(self):
+        # Test a simple one line string with preceding whitespace and newline
+        simple_docstring = u('  """simple one line docstring""" \r\n')
+        simple_docstring_io = StringIO(simple_docstring)
+        tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
+        token_list = list(tokens)
+        assert token_list[0][0] == INDENT
+        typ, value, start_pos, prefix = token_list[1]
+        assert prefix == '  '
+        assert value == '"""simple one line docstring"""'
+        assert typ == STRING
+        typ, value, start_pos, prefix = token_list[2]
+        assert prefix == ' '
+        assert typ == NEWLINE
+
+    def test_function_whitespace(self):
+        # Test function definition whitespace identification
+        fundef = u('''def test_whitespace(*args, **kwargs):
+    x = 1
+    if x > 0:
+        print(True)
+''')
+        fundef_io = StringIO(fundef)
+        tokens = parser.tokenize.generate_tokens(fundef_io.readline)
+        token_list = list(tokens)
+        for _, value, _, prefix in token_list:
+            if value == 'test_whitespace':
+                assert prefix == ' '
+            if value == '(':
+                assert prefix == ''
+            if value == '*':
+                assert prefix == ''
+            if value == '**':
+                assert prefix == ' '
+            if value == 'print':
+                assert prefix == '        '
+            if value == 'if':
+                assert prefix == '    '
+
+    def test_identifier_contains_unicode(self):
+        fundef = u('''
+def 我あφ():
+    pass
+''')
+        fundef_io = StringIO(fundef)
+        if is_py3:
+            tokens = parser.tokenize.generate_tokens(fundef_io.readline)
+            token_list = list(tokens)
+            identifier_token = next(
+                (token for token in token_list if token[1] == '我あφ'),
+                None
+            )
+            self.assertIsNotNone(identifier_token)
+            assert identifier_token[0] == NAME
+        else:
+            pass

    def test_quoted_strings(self):

@@ -45,4 +119,4 @@ asdfasdf""" + "h"
 def test_tokenizer_with_string_literal_backslash():
    import jedi
    c = jedi.Script("statement = u'foo\\\n'; statement").goto_definitions()
-    assert c[0]._name.parent.parent.obj == 'foo'
+    assert c[0]._name.parent.obj == 'foo'