forked from VimPlug/jedi
Replace the tokenizer's output with a tuple (switching back from a Token class).
This commit is contained in:
@@ -291,13 +291,10 @@ class Parser(object):
|
|||||||
self._scope = self.module
|
self._scope = self.module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
new_scope = False
|
for typ, value, start_pos, prefix in tokenizer:
|
||||||
for token in tokenizer:
|
|
||||||
typ = token.type
|
|
||||||
value = token.value
|
|
||||||
if typ == tokenize.OP:
|
if typ == tokenize.OP:
|
||||||
typ = grammar.opmap[value]
|
typ = grammar.opmap[value]
|
||||||
yield typ, value, token.prefix, token.start_pos
|
yield typ, value, prefix, start_pos
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<%s: %s>" % (type(self).__name__, self.module)
|
return "<%s: %s>" % (type(self).__name__, self.module)
|
||||||
|
|||||||
@@ -12,8 +12,7 @@ from jedi.parser import Parser
|
|||||||
from jedi.parser import tree as pr
|
from jedi.parser import tree as pr
|
||||||
from jedi.parser import tokenize
|
from jedi.parser import tokenize
|
||||||
from jedi import cache
|
from jedi import cache
|
||||||
from jedi.parser.tokenize import (source_tokens, Token, FLOWS, NEWLINE,
|
from jedi.parser.tokenize import source_tokens, FLOWS, NEWLINE, COMMENT, ENDMARKER
|
||||||
COMMENT, ENDMARKER)
|
|
||||||
|
|
||||||
|
|
||||||
class Module(pr.Module, pr.Simple):
|
class Module(pr.Module, pr.Simple):
|
||||||
@@ -387,7 +386,7 @@ class FastTokenizer(object):
|
|||||||
self.closed = False
|
self.closed = False
|
||||||
|
|
||||||
# fast parser options
|
# fast parser options
|
||||||
self.current = self.previous = Token(None, '', (0, 0))
|
self.current = self.previous = None, '', (0, 0)
|
||||||
self.in_flow = False
|
self.in_flow = False
|
||||||
self.new_indent = False
|
self.new_indent = False
|
||||||
self.parser_indent = self.old_parser_indent = 0
|
self.parser_indent = self.old_parser_indent = 0
|
||||||
|
|||||||
@@ -17,8 +17,6 @@ from io import StringIO
|
|||||||
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
|
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
|
||||||
ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
||||||
|
|
||||||
from jedi._compatibility import u
|
|
||||||
|
|
||||||
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
|
||||||
|
|
||||||
|
|
||||||
@@ -34,82 +32,6 @@ COMMENT = N_TOKENS
|
|||||||
tok_name[COMMENT] = 'COMMENT'
|
tok_name[COMMENT] = 'COMMENT'
|
||||||
|
|
||||||
|
|
||||||
class Token(object):
|
|
||||||
"""
|
|
||||||
The token object is an efficient representation of the structure
|
|
||||||
(type, token, (start_pos_line, start_pos_col, prefix)). It has indexer
|
|
||||||
methods that maintain compatibility to existing code that expects the above
|
|
||||||
structure.
|
|
||||||
|
|
||||||
>>> repr(Token(1, "test", (1, 1, '')))
|
|
||||||
"<Token: ('NAME', 'test', (1, 1, ''))>"
|
|
||||||
>>> Token(1, 'bar', (3, 4, '')).__getstate__()
|
|
||||||
(1, 'bar', 3, 4, '')
|
|
||||||
>>> a = Token(0, 'baz', (0, 0, ''))
|
|
||||||
>>> a.__setstate__((1, 'foo', 3, 4, ''))
|
|
||||||
>>> a
|
|
||||||
<Token: ('NAME', 'foo', (3, 4, ''))>
|
|
||||||
>>> a.start_pos
|
|
||||||
(3, 4)
|
|
||||||
>>> a.value
|
|
||||||
'foo'
|
|
||||||
>>> a._start_pos_col
|
|
||||||
4
|
|
||||||
>>> Token(1, u("😷"), (1 ,1, '')).value + "p" == u("😷p")
|
|
||||||
True
|
|
||||||
"""
|
|
||||||
__slots__ = ("type", "value", "_start_pos_line", "_start_pos_col",
|
|
||||||
"prefix")
|
|
||||||
|
|
||||||
def __init__(self, type, value, start_pos, prefix=''):
|
|
||||||
self.type = type
|
|
||||||
self.value = value
|
|
||||||
self._start_pos_line = start_pos[0]
|
|
||||||
self._start_pos_col = start_pos[1]
|
|
||||||
self.prefix = prefix
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
typ = tok_name[self.type]
|
|
||||||
content = typ, self.value,\
|
|
||||||
(self._start_pos_line, self._start_pos_col, self.prefix)
|
|
||||||
return "<%s: %s>" % (type(self).__name__, content)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def start_pos(self):
|
|
||||||
return self._start_pos_line, self._start_pos_col
|
|
||||||
|
|
||||||
@property
|
|
||||||
def end_pos(self):
|
|
||||||
"""Returns end position respecting multiline tokens."""
|
|
||||||
end_pos_line = self._start_pos_line
|
|
||||||
lines = self.value.split('\n')
|
|
||||||
if self.value.endswith('\n'):
|
|
||||||
lines = lines[:-1]
|
|
||||||
lines[-1] += '\n'
|
|
||||||
end_pos_line += len(lines) - 1
|
|
||||||
end_pos_col = self._start_pos_col
|
|
||||||
# Check for multiline token
|
|
||||||
if self._start_pos_line == end_pos_line:
|
|
||||||
end_pos_col += len(lines[-1])
|
|
||||||
else:
|
|
||||||
end_pos_col = len(lines[-1])
|
|
||||||
return (end_pos_line, end_pos_col)
|
|
||||||
|
|
||||||
# Make cache footprint smaller for faster unpickling
|
|
||||||
def __getstate__(self):
|
|
||||||
return (self.type, self.value,
|
|
||||||
self._start_pos_line, self._start_pos_col,
|
|
||||||
self.prefix)
|
|
||||||
|
|
||||||
# TODO DELETE this is not needed anymore, I guess. It should not get pickled.
|
|
||||||
def __setstate__(self, state):
|
|
||||||
self.type = state[0]
|
|
||||||
self.value = state[1]
|
|
||||||
self._start_pos_line = state[2]
|
|
||||||
self._start_pos_col = state[3]
|
|
||||||
self.prefix = state[4]
|
|
||||||
|
|
||||||
|
|
||||||
def group(*choices):
|
def group(*choices):
|
||||||
return '(' + '|'.join(choices) + ')'
|
return '(' + '|'.join(choices) + ')'
|
||||||
|
|
||||||
@@ -239,7 +161,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
line = readline() # readline returns empty when finished. See StringIO
|
line = readline() # readline returns empty when finished. See StringIO
|
||||||
if not line:
|
if not line:
|
||||||
if contstr:
|
if contstr:
|
||||||
yield Token(ERRORTOKEN, contstr, contstr_start, prefix)
|
yield ERRORTOKEN, contstr, contstr_start, prefix
|
||||||
break
|
break
|
||||||
|
|
||||||
lnum += 1
|
lnum += 1
|
||||||
@@ -249,8 +171,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
endmatch = endprog.match(line)
|
endmatch = endprog.match(line)
|
||||||
if endmatch:
|
if endmatch:
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
yield Token(STRING, contstr + line[:pos],
|
yield STRING, contstr + line[:pos], contstr_start, prefix
|
||||||
contstr_start, prefix)
|
|
||||||
contstr = ''
|
contstr = ''
|
||||||
contline = None
|
contline = None
|
||||||
else:
|
else:
|
||||||
@@ -266,7 +187,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
# If a literal starts but doesn't end the whole rest of the
|
# If a literal starts but doesn't end the whole rest of the
|
||||||
# line is an error token.
|
# line is an error token.
|
||||||
txt = line[pos:]
|
txt = line[pos:]
|
||||||
yield Token(ERRORTOKEN, txt, (lnum, pos))
|
yield ERRORTOKEN, txt, (lnum, pos), prefix
|
||||||
pos += 1
|
pos += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -279,18 +200,18 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
new_line = False
|
new_line = False
|
||||||
if paren_level == 0:
|
if paren_level == 0:
|
||||||
if start > indents[-1]:
|
if start > indents[-1]:
|
||||||
yield Token(INDENT, '', spos, '')
|
yield INDENT, '', spos, ''
|
||||||
indents.append(start)
|
indents.append(start)
|
||||||
while start < indents[-1]:
|
while start < indents[-1]:
|
||||||
yield Token(DEDENT, '', spos, '')
|
yield DEDENT, '', spos, ''
|
||||||
indents.pop()
|
indents.pop()
|
||||||
|
|
||||||
if (initial in numchars or # ordinary number
|
if (initial in numchars or # ordinary number
|
||||||
(initial == '.' and token != '.' and token != '...')):
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
yield Token(NUMBER, token, spos, prefix)
|
yield NUMBER, token, spos, prefix
|
||||||
elif initial in '\r\n':
|
elif initial in '\r\n':
|
||||||
if not new_line and paren_level == 0:
|
if not new_line and paren_level == 0:
|
||||||
yield Token(NEWLINE, token, spos, prefix)
|
yield NEWLINE, token, spos, prefix
|
||||||
new_line = True
|
new_line = True
|
||||||
elif initial == '#':
|
elif initial == '#':
|
||||||
assert not token.endswith("\n")
|
assert not token.endswith("\n")
|
||||||
@@ -301,7 +222,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
if endmatch: # all on one line
|
if endmatch: # all on one line
|
||||||
pos = endmatch.end(0)
|
pos = endmatch.end(0)
|
||||||
token = line[start:pos]
|
token = line[start:pos]
|
||||||
yield Token(STRING, token, spos, prefix)
|
yield STRING, token, spos, prefix
|
||||||
else:
|
else:
|
||||||
contstr_start = (lnum, start) # multiple lines
|
contstr_start = (lnum, start) # multiple lines
|
||||||
contstr = line[start:]
|
contstr = line[start:]
|
||||||
@@ -318,18 +239,18 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
contline = line
|
contline = line
|
||||||
break
|
break
|
||||||
else: # ordinary string
|
else: # ordinary string
|
||||||
yield Token(STRING, token, spos, prefix)
|
yield STRING, token, spos, prefix
|
||||||
elif initial in namechars: # ordinary name
|
elif initial in namechars: # ordinary name
|
||||||
if token in ALWAYS_BREAK_TOKEN:
|
if token in ALWAYS_BREAK_TOKEN:
|
||||||
paren_level = 0
|
paren_level = 0
|
||||||
while True:
|
while True:
|
||||||
indent = indents.pop()
|
indent = indents.pop()
|
||||||
if indent > start:
|
if indent > start:
|
||||||
yield Token(DEDENT, '', (lnum, 0), '')
|
yield DEDENT, '', (lnum, 0), ''
|
||||||
else:
|
else:
|
||||||
indents.append(indent)
|
indents.append(indent)
|
||||||
break
|
break
|
||||||
yield Token(NAME, token, spos, prefix)
|
yield NAME, token, spos, prefix
|
||||||
elif initial == '\\' and line[start:] == '\\\n': # continued stmt
|
elif initial == '\\' and line[start:] == '\\\n': # continued stmt
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@@ -337,8 +258,8 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
paren_level += 1
|
paren_level += 1
|
||||||
elif token in ')]}':
|
elif token in ')]}':
|
||||||
paren_level -= 1
|
paren_level -= 1
|
||||||
yield Token(OP, token, spos, prefix)
|
yield OP, token, spos, prefix
|
||||||
|
|
||||||
for indent in indents[1:]:
|
for indent in indents[1:]:
|
||||||
yield Token(DEDENT, '', (lnum, 0), '')
|
yield DEDENT, '', (lnum, 0), ''
|
||||||
yield Token(ENDMARKER, '', (lnum, 0), prefix)
|
yield ENDMARKER, '', (lnum, 0), prefix
|
||||||
|
|||||||
@@ -74,13 +74,12 @@ class UserContext(object):
|
|||||||
force_point = False
|
force_point = False
|
||||||
last_type = None
|
last_type = None
|
||||||
is_first = True
|
is_first = True
|
||||||
for tok in gen:
|
for tok_type, tok_str, tok_start_pos, prefix in gen:
|
||||||
tok_type = tok.type
|
# TODO end is not correct, doesn't take new lines in consideration.
|
||||||
tok_str = tok.value
|
end = tok_start_pos[0], tok_start_pos[-1] + len(tok_str)
|
||||||
end = tok.end_pos
|
|
||||||
self._column_temp = self._line_length - end[1]
|
self._column_temp = self._line_length - end[1]
|
||||||
if is_first:
|
if is_first:
|
||||||
if tok.start_pos != (1, 0): # whitespace is not a path
|
if tok_start_pos != (1, 0): # whitespace is not a path
|
||||||
return u(''), start_cursor
|
return u(''), start_cursor
|
||||||
is_first = False
|
is_first = False
|
||||||
|
|
||||||
@@ -118,7 +117,7 @@ class UserContext(object):
|
|||||||
else:
|
else:
|
||||||
if tok_str == '-':
|
if tok_str == '-':
|
||||||
next_tok = next(gen)
|
next_tok = next(gen)
|
||||||
if next_tok.value == 'e':
|
if next_tok[1] == 'e':
|
||||||
gen.push_back(next_tok)
|
gen.push_back(next_tok)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
@@ -166,16 +165,15 @@ class UserContext(object):
|
|||||||
next_must_be_name = False
|
next_must_be_name = False
|
||||||
next_is_key = False
|
next_is_key = False
|
||||||
key_name = None
|
key_name = None
|
||||||
for token in self._get_backwards_tokenizer(self.position):
|
for tok_type, tok_str, start_pos, prefix in self._get_backwards_tokenizer(self.position):
|
||||||
tok_str = token.value
|
|
||||||
if next_must_be_name:
|
if next_must_be_name:
|
||||||
if token.type == tokenize.NAME:
|
if tok_type == tokenize.NAME:
|
||||||
call, _ = self._calc_path_until_cursor(start_pos=pos)
|
call, _ = self._calc_path_until_cursor(start_pos=pos)
|
||||||
return call, index, key_name
|
return call, index, key_name
|
||||||
index = 0
|
index = 0
|
||||||
next_must_be_name = False
|
next_must_be_name = False
|
||||||
elif next_is_key:
|
elif next_is_key:
|
||||||
if token.type == tokenize.NAME:
|
if tok_type == tokenize.NAME:
|
||||||
key_name = tok_str[::-1]
|
key_name = tok_str[::-1]
|
||||||
next_is_key = False
|
next_is_key = False
|
||||||
|
|
||||||
@@ -184,7 +182,7 @@ class UserContext(object):
|
|||||||
if level == 1:
|
if level == 1:
|
||||||
next_must_be_name = True
|
next_must_be_name = True
|
||||||
level = 0
|
level = 0
|
||||||
end = token.end_pos
|
end = start_pos[0], start_pos[1] + 1
|
||||||
self._column_temp = self._line_length - end[1]
|
self._column_temp = self._line_length - end[1]
|
||||||
pos = self._line_temp + 1, self._column_temp
|
pos = self._line_temp + 1, self._column_temp
|
||||||
elif tok_str == ')':
|
elif tok_str == ')':
|
||||||
|
|||||||
@@ -31,9 +31,9 @@ asdfasdf""" + "h"
|
|||||||
simple_docstring_io = StringIO(simple_docstring)
|
simple_docstring_io = StringIO(simple_docstring)
|
||||||
tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
|
tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
string_token = token_list[0]
|
_, value, _, prefix = token_list[0]
|
||||||
self.assertEqual(string_token.prefix, '')
|
self.assertEqual(prefix, '')
|
||||||
self.assertEqual(string_token.value, '"""simple one line docstring"""')
|
self.assertEqual(value, '"""simple one line docstring"""')
|
||||||
|
|
||||||
def test_simple_with_whitespace(self):
|
def test_simple_with_whitespace(self):
|
||||||
# Test a simple one line string with preceding whitespace and newline
|
# Test a simple one line string with preceding whitespace and newline
|
||||||
@@ -41,13 +41,13 @@ asdfasdf""" + "h"
|
|||||||
simple_docstring_io = StringIO(simple_docstring)
|
simple_docstring_io = StringIO(simple_docstring)
|
||||||
tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
|
tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
string_token = token_list[0]
|
typ, value, start_pos, prefix = token_list[0]
|
||||||
self.assertEqual(string_token.prefix, ' ')
|
self.assertEqual(prefix, ' ')
|
||||||
self.assertEqual(string_token.value, '"""simple one line docstring"""')
|
self.assertEqual(value, '"""simple one line docstring"""')
|
||||||
self.assertEqual(string_token.type, STRING)
|
self.assertEqual(typ, STRING)
|
||||||
newline_token = token_list[1]
|
typ, value, start_pos, prefix = token_list[1]
|
||||||
self.assertEqual(newline_token.prefix, ' ')
|
self.assertEqual(prefix, ' ')
|
||||||
self.assertEqual(newline_token.type, NEWLINE)
|
self.assertEqual(typ, NEWLINE)
|
||||||
|
|
||||||
def test_function_whitespace(self):
|
def test_function_whitespace(self):
|
||||||
# Test function definition whitespace identification
|
# Test function definition whitespace identification
|
||||||
@@ -59,20 +59,19 @@ asdfasdf""" + "h"
|
|||||||
fundef_io = StringIO(fundef)
|
fundef_io = StringIO(fundef)
|
||||||
tokens = parser.tokenize.generate_tokens(fundef_io.readline)
|
tokens = parser.tokenize.generate_tokens(fundef_io.readline)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
print(token_list)
|
for _, value, _, prefix in token_list:
|
||||||
for t in token_list:
|
if value == 'test_whitespace':
|
||||||
if t.value == 'test_whitespace':
|
self.assertEqual(prefix, ' ')
|
||||||
self.assertEqual(t.prefix, ' ')
|
if value == '(':
|
||||||
if t.value == '(':
|
self.assertEqual(prefix, '')
|
||||||
self.assertEqual(t.prefix, '')
|
if value == '*':
|
||||||
if t.value == '*':
|
self.assertEqual(prefix, '')
|
||||||
self.assertEqual(t.prefix, '')
|
if value == '**':
|
||||||
if t.value == '**':
|
self.assertEqual(prefix, ' ')
|
||||||
self.assertEqual(t.prefix, ' ')
|
if value == 'print':
|
||||||
if t.value == 'print':
|
self.assertEqual(prefix, ' ')
|
||||||
self.assertEqual(t.prefix, ' ')
|
if value == 'if':
|
||||||
if t.value == 'if':
|
self.assertEqual(prefix, ' ')
|
||||||
self.assertEqual(t.prefix, ' ')
|
|
||||||
|
|
||||||
|
|
||||||
def test_tokenizer_with_string_literal_backslash():
|
def test_tokenizer_with_string_literal_backslash():
|
||||||
|
|||||||
Reference in New Issue
Block a user