A test suite imported from jedi.

2026-03-13 10:21:10 +08:00 · 2017-05-11 09:43:37 -04:00
parent 6caedbdb68
commit 7445c303e3
12 changed files with 1798 additions and 0 deletions
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -0,0 +1,214 @@
+# -*- coding: utf-8    # This file contains Unicode characters.
+
+from textwrap import dedent
+
+from jedi._compatibility import is_py3, py_version
+from jedi.common import splitlines
+from parso.token import NAME, OP, NEWLINE, STRING, INDENT, ERRORTOKEN, ENDMARKER
+from parso import tokenize
+from parso.python import parse
+from parso.tokenize import TokenInfo
+
+from .helpers import unittest
+
+def _get_token_list(string):
+    return list(tokenize.source_tokens(string))
+
+
+class TokenTest(unittest.TestCase):
+    def test_end_pos_one_line(self):
+        parsed = parse(dedent('''
+        def testit():
+            a = "huhu"
+        '''))
+        simple_stmt = next(parsed.iter_funcdefs()).get_suite().children[-1]
+        string = simple_stmt.children[0].get_rhs()
+        assert string.end_pos == (3, 14)
+
+    def test_end_pos_multi_line(self):
+        parsed = parse(dedent('''
+        def testit():
+            a = """huhu
+        asdfasdf""" + "h"
+        '''))
+        expr_stmt = next(parsed.iter_funcdefs()).get_suite().children[1].children[0]
+        string_leaf = expr_stmt.get_rhs().children[0]
+        assert string_leaf.end_pos == (4, 11)
+
+    def test_simple_no_whitespace(self):
+        # Test a simple one line string, no preceding whitespace
+        simple_docstring = '"""simple one line docstring"""'
+        tokens = tokenize.source_tokens(simple_docstring)
+        token_list = list(tokens)
+        _, value, _, prefix = token_list[0]
+        assert prefix == ''
+        assert value == '"""simple one line docstring"""'
+
+    def test_simple_with_whitespace(self):
+        # Test a simple one line string with preceding whitespace and newline
+        simple_docstring = '  """simple one line docstring""" \r\n'
+        tokens = tokenize.source_tokens(simple_docstring)
+        token_list = list(tokens)
+        assert token_list[0][0] == INDENT
+        typ, value, start_pos, prefix = token_list[1]
+        assert prefix == '  '
+        assert value == '"""simple one line docstring"""'
+        assert typ == STRING
+        typ, value, start_pos, prefix = token_list[2]
+        assert prefix == ' '
+        assert typ == NEWLINE
+
+    def test_function_whitespace(self):
+        # Test function definition whitespace identification
+        fundef = dedent('''
+        def test_whitespace(*args, **kwargs):
+            x = 1
+            if x > 0:
+                print(True)
+        ''')
+        tokens = tokenize.source_tokens(fundef)
+        token_list = list(tokens)
+        for _, value, _, prefix in token_list:
+            if value == 'test_whitespace':
+                assert prefix == ' '
+            if value == '(':
+                assert prefix == ''
+            if value == '*':
+                assert prefix == ''
+            if value == '**':
+                assert prefix == ' '
+            if value == 'print':
+                assert prefix == '        '
+            if value == 'if':
+                assert prefix == '    '
+
+    def test_tokenize_multiline_I(self):
+        # Make sure multiline string having newlines have the end marker on the
+        # next line
+        fundef = '''""""\n'''
+        tokens = tokenize.source_tokens(fundef)
+        token_list = list(tokens)
+        assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
+                              TokenInfo(ENDMARKER ,       '', (2, 0), '')]
+
+    def test_tokenize_multiline_II(self):
+        # Make sure multiline string having no newlines have the end marker on
+        # same line
+        fundef = '''""""'''
+        tokens = tokenize.source_tokens(fundef)
+        token_list = list(tokens)
+        assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
+                              TokenInfo(ENDMARKER,      '', (1, 4), '')]
+
+    def test_tokenize_multiline_III(self):
+        # Make sure multiline string having newlines have the end marker on the
+        # next line even if several newline
+        fundef = '''""""\n\n'''
+        tokens = tokenize.source_tokens(fundef)
+        token_list = list(tokens)
+        assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
+                              TokenInfo(ENDMARKER,          '', (3, 0), '')]
+
+    def test_identifier_contains_unicode(self):
+        fundef = dedent('''
+        def 我あφ():
+            pass
+        ''')
+        tokens = tokenize.source_tokens(fundef)
+        token_list = list(tokens)
+        unicode_token = token_list[1]
+        if is_py3:
+            assert unicode_token[0] == NAME
+        else:
+            # Unicode tokens in Python 2 seem to be identified as operators.
+            # They will be ignored in the parser, that's ok.
+            assert unicode_token[0] == OP
+
+    def test_quoted_strings(self):
+
+        string_tokens = [
+            'u"test"',
+            'u"""test"""',
+            'U"""test"""',
+            "u'''test'''",
+            "U'''test'''",
+        ]
+
+        for s in string_tokens:
+            module = parse('''a = %s\n''' % s)
+            simple_stmt = module.children[0]
+            expr_stmt = simple_stmt.children[0]
+            assert len(expr_stmt.children) == 3
+            string_tok = expr_stmt.children[2]
+            assert string_tok.type == 'string'
+            assert string_tok.value == s
+
+
+def test_tokenizer_with_string_literal_backslash():
+    import jedi
+    c = jedi.Script("statement = u'foo\\\n'; statement").goto_definitions()
+    assert c[0]._name._context.obj == 'foo'
+
+
+def test_ur_literals():
+    """
+    Decided to parse `u''` literals regardless of Python version. This makes
+    probably sense:
+
+    - Python 3+ doesn't support it, but it doesn't hurt
+      not be. While this is incorrect, it's just incorrect for one "old" and in
+      the future not very important version.
+    - All the other Python versions work very well with it.
+    """
+    def check(literal, is_literal=True):
+        token_list = _get_token_list(literal)
+        typ, result_literal, _, _ = token_list[0]
+        if is_literal:
+            assert typ == STRING
+            assert result_literal == literal
+        else:
+            assert typ == NAME
+
+    check('u""')
+    check('ur""', is_literal=not is_py3)
+    check('Ur""', is_literal=not is_py3)
+    check('UR""', is_literal=not is_py3)
+    check('bR""')
+    # Starting with Python 3.3 this ordering is also possible, but we just
+    # enable it for all versions. It doesn't hurt.
+    check('Rb""')
+    # Starting with Python 3.6 format strings where introduced.
+    check('fr""', is_literal=py_version >= 36)
+    check('rF""', is_literal=py_version >= 36)
+    check('f""', is_literal=py_version >= 36)
+    check('F""', is_literal=py_version >= 36)
+
+
+def test_error_literal():
+    error_token, endmarker = _get_token_list('"\n')
+    assert error_token.type == tokenize.ERRORTOKEN
+    assert endmarker.prefix == ''
+    assert error_token.string == '"\n'
+    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.prefix == ''
+
+    bracket, error_token, endmarker = _get_token_list('( """')
+    assert error_token.type == tokenize.ERRORTOKEN
+    assert error_token.prefix == ' '
+    assert error_token.string == '"""'
+    assert endmarker.type == tokenize.ENDMARKER
+    assert endmarker.prefix == ''
+
+
+def test_endmarker_end_pos():
+    def check(code):
+        tokens = _get_token_list(code)
+        lines = splitlines(code)
+        assert tokens[-1].end_pos == (len(lines), len(lines[-1]))
+
+    check('#c')
+    check('#c\n')
+    check('a\n')
+    check('a')
+    check(r'a\\n')
+    check('a\\')