# -*- coding: utf-8 # This file contains Unicode characters. from io import StringIO from textwrap import dedent import pytest from jedi._compatibility import u, is_py3, py_version from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT from jedi.parser import ParserWithRecovery, load_grammar, tokenize from ..helpers import unittest class TokenTest(unittest.TestCase): def test_end_pos_one_line(self): parsed = ParserWithRecovery(load_grammar(), dedent(u(''' def testit(): a = "huhu" '''))) tok = parsed.module.subscopes[0].statements[0].children[2] assert tok.end_pos == (3, 14) def test_end_pos_multi_line(self): parsed = ParserWithRecovery(load_grammar(), dedent(u(''' def testit(): a = """huhu asdfasdf""" + "h" '''))) tok = parsed.module.subscopes[0].statements[0].children[2].children[0] assert tok.end_pos == (4, 11) def test_simple_no_whitespace(self): # Test a simple one line string, no preceding whitespace simple_docstring = u('"""simple one line docstring"""') simple_docstring_io = StringIO(simple_docstring) tokens = tokenize.generate_tokens(simple_docstring_io.readline) token_list = list(tokens) _, value, _, prefix = token_list[0] assert prefix == '' assert value == '"""simple one line docstring"""' def test_simple_with_whitespace(self): # Test a simple one line string with preceding whitespace and newline simple_docstring = u(' """simple one line docstring""" \r\n') simple_docstring_io = StringIO(simple_docstring) tokens = tokenize.generate_tokens(simple_docstring_io.readline) token_list = list(tokens) assert token_list[0][0] == INDENT typ, value, start_pos, prefix = token_list[1] assert prefix == ' ' assert value == '"""simple one line docstring"""' assert typ == STRING typ, value, start_pos, prefix = token_list[2] assert prefix == ' ' assert typ == NEWLINE def test_function_whitespace(self): # Test function definition whitespace identification fundef = dedent(u(''' def test_whitespace(*args, **kwargs): x = 1 if x > 0: print(True) ''')) fundef_io = StringIO(fundef) tokens = tokenize.generate_tokens(fundef_io.readline) token_list = list(tokens) for _, value, _, prefix in token_list: if value == 'test_whitespace': assert prefix == ' ' if value == '(': assert prefix == '' if value == '*': assert prefix == '' if value == '**': assert prefix == ' ' if value == 'print': assert prefix == ' ' if value == 'if': assert prefix == ' ' def test_identifier_contains_unicode(self): fundef = dedent(u(''' def 我あφ(): pass ''')) fundef_io = StringIO(fundef) tokens = tokenize.generate_tokens(fundef_io.readline) token_list = list(tokens) unicode_token = token_list[1] if is_py3: assert unicode_token[0] == NAME else: # Unicode tokens in Python 2 seem to be identified as operators. # They will be ignored in the parser, that's ok. assert unicode_token[0] == OP def test_quoted_strings(self): string_tokens = [ 'u"test"', 'u"""test"""', 'U"""test"""', "u'''test'''", "U'''test'''", ] for s in string_tokens: parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s)) simple_stmt = parsed.module.children[0] expr_stmt = simple_stmt.children[0] assert len(expr_stmt.children) == 3 string_tok = expr_stmt.children[2] assert string_tok.type == 'string' assert string_tok.value == s assert string_tok.eval() == 'test' def test_tokenizer_with_string_literal_backslash(): import jedi c = jedi.Script("statement = u'foo\\\n'; statement").goto_definitions() assert c[0]._name._context.obj == 'foo' def test_ur_literals(): """ Decided to parse `u''` literals regardless of Python version. This makes probably sense: - Python 3+ doesn't support it, but it doesn't hurt not be. While this is incorrect, it's just incorrect for one "old" and in the future not very important version. - All the other Python versions work very well with it. """ def check(literal, is_literal=True): io = StringIO(u(literal)) tokens = tokenize.generate_tokens(io.readline) token_list = list(tokens) typ, result_literal, _, _ = token_list[0] if is_literal: assert typ == STRING assert result_literal == literal else: assert typ == NAME check('u""') check('ur""', is_literal=not is_py3) check('Ur""', is_literal=not is_py3) check('UR""', is_literal=not is_py3) check('bR""') # Starting with Python 3.3 this ordering is also possible, but we just # enable it for all versions. It doesn't hurt. check('Rb""') # Starting with Python 3.6 format strings where introduced. check('fr""', is_literal=py_version >= 36) check('rF""', is_literal=py_version >= 36) check('f""', is_literal=py_version >= 36) check('F""', is_literal=py_version >= 36)