1
0
forked from VimPlug/jedi

Replace the tokenizer's output with a tuple (switching back from a Token class).

This commit is contained in:
Dave Halter
2014-12-16 00:09:31 +01:00
parent 680fdd574b
commit eaace104dd
5 changed files with 50 additions and 136 deletions

View File

@@ -291,13 +291,10 @@ class Parser(object):
self._scope = self.module self._scope = self.module
""" """
new_scope = False for typ, value, start_pos, prefix in tokenizer:
for token in tokenizer:
typ = token.type
value = token.value
if typ == tokenize.OP: if typ == tokenize.OP:
typ = grammar.opmap[value] typ = grammar.opmap[value]
yield typ, value, token.prefix, token.start_pos yield typ, value, prefix, start_pos
def __repr__(self): def __repr__(self):
return "<%s: %s>" % (type(self).__name__, self.module) return "<%s: %s>" % (type(self).__name__, self.module)

View File

@@ -12,8 +12,7 @@ from jedi.parser import Parser
from jedi.parser import tree as pr from jedi.parser import tree as pr
from jedi.parser import tokenize from jedi.parser import tokenize
from jedi import cache from jedi import cache
from jedi.parser.tokenize import (source_tokens, Token, FLOWS, NEWLINE, from jedi.parser.tokenize import source_tokens, FLOWS, NEWLINE, COMMENT, ENDMARKER
COMMENT, ENDMARKER)
class Module(pr.Module, pr.Simple): class Module(pr.Module, pr.Simple):
@@ -387,7 +386,7 @@ class FastTokenizer(object):
self.closed = False self.closed = False
# fast parser options # fast parser options
self.current = self.previous = Token(None, '', (0, 0)) self.current = self.previous = None, '', (0, 0)
self.in_flow = False self.in_flow = False
self.new_indent = False self.new_indent = False
self.parser_indent = self.old_parser_indent = 0 self.parser_indent = self.old_parser_indent = 0

View File

@@ -17,8 +17,6 @@ from io import StringIO
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP, from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
ERRORTOKEN, NEWLINE, INDENT, DEDENT) ERRORTOKEN, NEWLINE, INDENT, DEDENT)
from jedi._compatibility import u
cookie_re = re.compile("coding[:=]\s*([-\w.]+)") cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
@@ -34,82 +32,6 @@ COMMENT = N_TOKENS
tok_name[COMMENT] = 'COMMENT' tok_name[COMMENT] = 'COMMENT'
class Token(object):
"""
The token object is an efficient representation of the structure
(type, token, (start_pos_line, start_pos_col, prefix)). It has indexer
methods that maintain compatibility to existing code that expects the above
structure.
>>> repr(Token(1, "test", (1, 1, '')))
"<Token: ('NAME', 'test', (1, 1, ''))>"
>>> Token(1, 'bar', (3, 4, '')).__getstate__()
(1, 'bar', 3, 4, '')
>>> a = Token(0, 'baz', (0, 0, ''))
>>> a.__setstate__((1, 'foo', 3, 4, ''))
>>> a
<Token: ('NAME', 'foo', (3, 4, ''))>
>>> a.start_pos
(3, 4)
>>> a.value
'foo'
>>> a._start_pos_col
4
>>> Token(1, u("😷"), (1 ,1, '')).value + "p" == u("😷p")
True
"""
__slots__ = ("type", "value", "_start_pos_line", "_start_pos_col",
"prefix")
def __init__(self, type, value, start_pos, prefix=''):
self.type = type
self.value = value
self._start_pos_line = start_pos[0]
self._start_pos_col = start_pos[1]
self.prefix = prefix
def __repr__(self):
typ = tok_name[self.type]
content = typ, self.value,\
(self._start_pos_line, self._start_pos_col, self.prefix)
return "<%s: %s>" % (type(self).__name__, content)
@property
def start_pos(self):
return self._start_pos_line, self._start_pos_col
@property
def end_pos(self):
"""Returns end position respecting multiline tokens."""
end_pos_line = self._start_pos_line
lines = self.value.split('\n')
if self.value.endswith('\n'):
lines = lines[:-1]
lines[-1] += '\n'
end_pos_line += len(lines) - 1
end_pos_col = self._start_pos_col
# Check for multiline token
if self._start_pos_line == end_pos_line:
end_pos_col += len(lines[-1])
else:
end_pos_col = len(lines[-1])
return (end_pos_line, end_pos_col)
# Make cache footprint smaller for faster unpickling
def __getstate__(self):
return (self.type, self.value,
self._start_pos_line, self._start_pos_col,
self.prefix)
# TODO DELETE this is not needed anymore, I guess. It should not get pickled.
def __setstate__(self, state):
self.type = state[0]
self.value = state[1]
self._start_pos_line = state[2]
self._start_pos_col = state[3]
self.prefix = state[4]
def group(*choices): def group(*choices):
return '(' + '|'.join(choices) + ')' return '(' + '|'.join(choices) + ')'
@@ -239,7 +161,7 @@ def generate_tokens(readline, line_offset=0):
line = readline() # readline returns empty when finished. See StringIO line = readline() # readline returns empty when finished. See StringIO
if not line: if not line:
if contstr: if contstr:
yield Token(ERRORTOKEN, contstr, contstr_start, prefix) yield ERRORTOKEN, contstr, contstr_start, prefix
break break
lnum += 1 lnum += 1
@@ -249,8 +171,7 @@ def generate_tokens(readline, line_offset=0):
endmatch = endprog.match(line) endmatch = endprog.match(line)
if endmatch: if endmatch:
pos = endmatch.end(0) pos = endmatch.end(0)
yield Token(STRING, contstr + line[:pos], yield STRING, contstr + line[:pos], contstr_start, prefix
contstr_start, prefix)
contstr = '' contstr = ''
contline = None contline = None
else: else:
@@ -266,7 +187,7 @@ def generate_tokens(readline, line_offset=0):
# If a literal starts but doesn't end the whole rest of the # If a literal starts but doesn't end the whole rest of the
# line is an error token. # line is an error token.
txt = line[pos:] txt = line[pos:]
yield Token(ERRORTOKEN, txt, (lnum, pos)) yield ERRORTOKEN, txt, (lnum, pos), prefix
pos += 1 pos += 1
continue continue
@@ -279,18 +200,18 @@ def generate_tokens(readline, line_offset=0):
new_line = False new_line = False
if paren_level == 0: if paren_level == 0:
if start > indents[-1]: if start > indents[-1]:
yield Token(INDENT, '', spos, '') yield INDENT, '', spos, ''
indents.append(start) indents.append(start)
while start < indents[-1]: while start < indents[-1]:
yield Token(DEDENT, '', spos, '') yield DEDENT, '', spos, ''
indents.pop() indents.pop()
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
yield Token(NUMBER, token, spos, prefix) yield NUMBER, token, spos, prefix
elif initial in '\r\n': elif initial in '\r\n':
if not new_line and paren_level == 0: if not new_line and paren_level == 0:
yield Token(NEWLINE, token, spos, prefix) yield NEWLINE, token, spos, prefix
new_line = True new_line = True
elif initial == '#': elif initial == '#':
assert not token.endswith("\n") assert not token.endswith("\n")
@@ -301,7 +222,7 @@ def generate_tokens(readline, line_offset=0):
if endmatch: # all on one line if endmatch: # all on one line
pos = endmatch.end(0) pos = endmatch.end(0)
token = line[start:pos] token = line[start:pos]
yield Token(STRING, token, spos, prefix) yield STRING, token, spos, prefix
else: else:
contstr_start = (lnum, start) # multiple lines contstr_start = (lnum, start) # multiple lines
contstr = line[start:] contstr = line[start:]
@@ -318,18 +239,18 @@ def generate_tokens(readline, line_offset=0):
contline = line contline = line
break break
else: # ordinary string else: # ordinary string
yield Token(STRING, token, spos, prefix) yield STRING, token, spos, prefix
elif initial in namechars: # ordinary name elif initial in namechars: # ordinary name
if token in ALWAYS_BREAK_TOKEN: if token in ALWAYS_BREAK_TOKEN:
paren_level = 0 paren_level = 0
while True: while True:
indent = indents.pop() indent = indents.pop()
if indent > start: if indent > start:
yield Token(DEDENT, '', (lnum, 0), '') yield DEDENT, '', (lnum, 0), ''
else: else:
indents.append(indent) indents.append(indent)
break break
yield Token(NAME, token, spos, prefix) yield NAME, token, spos, prefix
elif initial == '\\' and line[start:] == '\\\n': # continued stmt elif initial == '\\' and line[start:] == '\\\n': # continued stmt
continue continue
else: else:
@@ -337,8 +258,8 @@ def generate_tokens(readline, line_offset=0):
paren_level += 1 paren_level += 1
elif token in ')]}': elif token in ')]}':
paren_level -= 1 paren_level -= 1
yield Token(OP, token, spos, prefix) yield OP, token, spos, prefix
for indent in indents[1:]: for indent in indents[1:]:
yield Token(DEDENT, '', (lnum, 0), '') yield DEDENT, '', (lnum, 0), ''
yield Token(ENDMARKER, '', (lnum, 0), prefix) yield ENDMARKER, '', (lnum, 0), prefix

View File

@@ -74,13 +74,12 @@ class UserContext(object):
force_point = False force_point = False
last_type = None last_type = None
is_first = True is_first = True
for tok in gen: for tok_type, tok_str, tok_start_pos, prefix in gen:
tok_type = tok.type # TODO end is not correct, doesn't take new lines in consideration.
tok_str = tok.value end = tok_start_pos[0], tok_start_pos[-1] + len(tok_str)
end = tok.end_pos
self._column_temp = self._line_length - end[1] self._column_temp = self._line_length - end[1]
if is_first: if is_first:
if tok.start_pos != (1, 0): # whitespace is not a path if tok_start_pos != (1, 0): # whitespace is not a path
return u(''), start_cursor return u(''), start_cursor
is_first = False is_first = False
@@ -118,7 +117,7 @@ class UserContext(object):
else: else:
if tok_str == '-': if tok_str == '-':
next_tok = next(gen) next_tok = next(gen)
if next_tok.value == 'e': if next_tok[1] == 'e':
gen.push_back(next_tok) gen.push_back(next_tok)
else: else:
break break
@@ -166,16 +165,15 @@ class UserContext(object):
next_must_be_name = False next_must_be_name = False
next_is_key = False next_is_key = False
key_name = None key_name = None
for token in self._get_backwards_tokenizer(self.position): for tok_type, tok_str, start_pos, prefix in self._get_backwards_tokenizer(self.position):
tok_str = token.value
if next_must_be_name: if next_must_be_name:
if token.type == tokenize.NAME: if tok_type == tokenize.NAME:
call, _ = self._calc_path_until_cursor(start_pos=pos) call, _ = self._calc_path_until_cursor(start_pos=pos)
return call, index, key_name return call, index, key_name
index = 0 index = 0
next_must_be_name = False next_must_be_name = False
elif next_is_key: elif next_is_key:
if token.type == tokenize.NAME: if tok_type == tokenize.NAME:
key_name = tok_str[::-1] key_name = tok_str[::-1]
next_is_key = False next_is_key = False
@@ -184,7 +182,7 @@ class UserContext(object):
if level == 1: if level == 1:
next_must_be_name = True next_must_be_name = True
level = 0 level = 0
end = token.end_pos end = start_pos[0], start_pos[1] + 1
self._column_temp = self._line_length - end[1] self._column_temp = self._line_length - end[1]
pos = self._line_temp + 1, self._column_temp pos = self._line_temp + 1, self._column_temp
elif tok_str == ')': elif tok_str == ')':

View File

@@ -31,9 +31,9 @@ asdfasdf""" + "h"
simple_docstring_io = StringIO(simple_docstring) simple_docstring_io = StringIO(simple_docstring)
tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline) tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
token_list = list(tokens) token_list = list(tokens)
string_token = token_list[0] _, value, _, prefix = token_list[0]
self.assertEqual(string_token.prefix, '') self.assertEqual(prefix, '')
self.assertEqual(string_token.value, '"""simple one line docstring"""') self.assertEqual(value, '"""simple one line docstring"""')
def test_simple_with_whitespace(self): def test_simple_with_whitespace(self):
# Test a simple one line string with preceding whitespace and newline # Test a simple one line string with preceding whitespace and newline
@@ -41,13 +41,13 @@ asdfasdf""" + "h"
simple_docstring_io = StringIO(simple_docstring) simple_docstring_io = StringIO(simple_docstring)
tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline) tokens = parser.tokenize.generate_tokens(simple_docstring_io.readline)
token_list = list(tokens) token_list = list(tokens)
string_token = token_list[0] typ, value, start_pos, prefix = token_list[0]
self.assertEqual(string_token.prefix, ' ') self.assertEqual(prefix, ' ')
self.assertEqual(string_token.value, '"""simple one line docstring"""') self.assertEqual(value, '"""simple one line docstring"""')
self.assertEqual(string_token.type, STRING) self.assertEqual(typ, STRING)
newline_token = token_list[1] typ, value, start_pos, prefix = token_list[1]
self.assertEqual(newline_token.prefix, ' ') self.assertEqual(prefix, ' ')
self.assertEqual(newline_token.type, NEWLINE) self.assertEqual(typ, NEWLINE)
def test_function_whitespace(self): def test_function_whitespace(self):
# Test function definition whitespace identification # Test function definition whitespace identification
@@ -59,20 +59,19 @@ asdfasdf""" + "h"
fundef_io = StringIO(fundef) fundef_io = StringIO(fundef)
tokens = parser.tokenize.generate_tokens(fundef_io.readline) tokens = parser.tokenize.generate_tokens(fundef_io.readline)
token_list = list(tokens) token_list = list(tokens)
print(token_list) for _, value, _, prefix in token_list:
for t in token_list: if value == 'test_whitespace':
if t.value == 'test_whitespace': self.assertEqual(prefix, ' ')
self.assertEqual(t.prefix, ' ') if value == '(':
if t.value == '(': self.assertEqual(prefix, '')
self.assertEqual(t.prefix, '') if value == '*':
if t.value == '*': self.assertEqual(prefix, '')
self.assertEqual(t.prefix, '') if value == '**':
if t.value == '**': self.assertEqual(prefix, ' ')
self.assertEqual(t.prefix, ' ') if value == 'print':
if t.value == 'print': self.assertEqual(prefix, ' ')
self.assertEqual(t.prefix, ' ') if value == 'if':
if t.value == 'if': self.assertEqual(prefix, ' ')
self.assertEqual(t.prefix, ' ')
def test_tokenizer_with_string_literal_backslash(): def test_tokenizer_with_string_literal_backslash():