1
0
forked from VimPlug/jedi
Files
jedi-fork/test/test_parser/test_tokenize.py

161 lines
5.5 KiB
Python

# -*- coding: utf-8 # This file contains Unicode characters.
from io import StringIO
from textwrap import dedent
import pytest
from jedi._compatibility import u, is_py3, py_version
from jedi.parser.token import NAME, OP, NEWLINE, STRING, INDENT
from jedi.parser import ParserWithRecovery, load_grammar, tokenize
from ..helpers import unittest
class TokenTest(unittest.TestCase):
def test_end_pos_one_line(self):
parsed = ParserWithRecovery(load_grammar(), dedent(u('''
def testit():
a = "huhu"
''')))
tok = parsed.module.subscopes[0].statements[0].children[2]
assert tok.end_pos == (3, 14)
def test_end_pos_multi_line(self):
parsed = ParserWithRecovery(load_grammar(), dedent(u('''
def testit():
a = """huhu
asdfasdf""" + "h"
''')))
tok = parsed.module.subscopes[0].statements[0].children[2].children[0]
assert tok.end_pos == (4, 11)
def test_simple_no_whitespace(self):
# Test a simple one line string, no preceding whitespace
simple_docstring = u('"""simple one line docstring"""')
simple_docstring_io = StringIO(simple_docstring)
tokens = tokenize.generate_tokens(simple_docstring_io.readline)
token_list = list(tokens)
_, value, _, prefix = token_list[0]
assert prefix == ''
assert value == '"""simple one line docstring"""'
def test_simple_with_whitespace(self):
# Test a simple one line string with preceding whitespace and newline
simple_docstring = u(' """simple one line docstring""" \r\n')
simple_docstring_io = StringIO(simple_docstring)
tokens = tokenize.generate_tokens(simple_docstring_io.readline)
token_list = list(tokens)
assert token_list[0][0] == INDENT
typ, value, start_pos, prefix = token_list[1]
assert prefix == ' '
assert value == '"""simple one line docstring"""'
assert typ == STRING
typ, value, start_pos, prefix = token_list[2]
assert prefix == ' '
assert typ == NEWLINE
def test_function_whitespace(self):
# Test function definition whitespace identification
fundef = dedent(u('''
def test_whitespace(*args, **kwargs):
x = 1
if x > 0:
print(True)
'''))
fundef_io = StringIO(fundef)
tokens = tokenize.generate_tokens(fundef_io.readline)
token_list = list(tokens)
for _, value, _, prefix in token_list:
if value == 'test_whitespace':
assert prefix == ' '
if value == '(':
assert prefix == ''
if value == '*':
assert prefix == ''
if value == '**':
assert prefix == ' '
if value == 'print':
assert prefix == ' '
if value == 'if':
assert prefix == ' '
def test_identifier_contains_unicode(self):
fundef = dedent(u('''
def 我あφ():
pass
'''))
fundef_io = StringIO(fundef)
tokens = tokenize.generate_tokens(fundef_io.readline)
token_list = list(tokens)
unicode_token = token_list[1]
if is_py3:
assert unicode_token[0] == NAME
else:
# Unicode tokens in Python 2 seem to be identified as operators.
# They will be ignored in the parser, that's ok.
assert unicode_token[0] == OP
def test_quoted_strings(self):
string_tokens = [
'u"test"',
'u"""test"""',
'U"""test"""',
"u'''test'''",
"U'''test'''",
]
for s in string_tokens:
parsed = ParserWithRecovery(load_grammar(), u('''a = %s\n''' % s))
simple_stmt = parsed.module.children[0]
expr_stmt = simple_stmt.children[0]
assert len(expr_stmt.children) == 3
string_tok = expr_stmt.children[2]
assert string_tok.type == 'string'
assert string_tok.value == s
assert string_tok.eval() == 'test'
def test_tokenizer_with_string_literal_backslash():
import jedi
c = jedi.Script("statement = u'foo\\\n'; statement").goto_definitions()
assert c[0]._name._context.obj == 'foo'
def test_ur_literals():
"""
Decided to parse `u''` literals regardless of Python version. This makes
probably sense:
- Python 3+ doesn't support it, but it doesn't hurt
not be. While this is incorrect, it's just incorrect for one "old" and in
the future not very important version.
- All the other Python versions work very well with it.
"""
def check(literal, is_literal=True):
io = StringIO(u(literal))
tokens = tokenize.generate_tokens(io.readline)
token_list = list(tokens)
typ, result_literal, _, _ = token_list[0]
if is_literal:
assert typ == STRING
assert result_literal == literal
else:
assert typ == NAME
check('u""')
check('ur""', is_literal=not is_py3)
check('Ur""', is_literal=not is_py3)
check('UR""', is_literal=not is_py3)
check('bR""')
# Starting with Python 3.3 this ordering is also possible, but we just
# enable it for all versions. It doesn't hurt.
check('Rb""')
# Starting with Python 3.6 format strings where introduced.
check('fr""', is_literal=py_version >= 36)
check('rF""', is_literal=py_version >= 36)
check('f""', is_literal=py_version >= 36)
check('F""', is_literal=py_version >= 36)