1
0
forked from VimPlug/jedi

Merge pull request #353 from ganwell/tokens

Token objects
This commit is contained in:
David Halter
2013-12-08 04:40:32 -08:00
8 changed files with 244 additions and 42 deletions

View File

@@ -220,7 +220,7 @@ def save_module(path, name, parser, pickling=True):
class _ModulePickling(object): class _ModulePickling(object):
version = 4 version = 5
""" """
Version number (integer) for file system cache. Version number (integer) for file system cache.

View File

@@ -146,8 +146,13 @@ class NoErrorTokenizer(object):
if self.is_fast_parser \ if self.is_fast_parser \
and self.previous[0] in (tokenize.INDENT, tokenize.NL, None, and self.previous[0] in (tokenize.INDENT, tokenize.NL, None,
tokenize.NEWLINE, tokenize.DEDENT) \ tokenize.NEWLINE, tokenize.DEDENT) \
and c[0] not in (tokenize.COMMENT, tokenize.INDENT, and c[0] not in (
tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT): tokenize.COMMENT,
tokenize.INDENT,
tokenize.NL,
tokenize.NEWLINE,
tokenize.DEDENT
):
# print c, tokenize.tok_name[c[0]] # print c, tokenize.tok_name[c[0]]
tok = c[1] tok = c[1]

View File

@@ -6,6 +6,7 @@ import itertools
import tokenize import tokenize
from jedi.parser import representation as pr from jedi.parser import representation as pr
from jedi.parser import token
class ObjectImporter(object): class ObjectImporter(object):
@@ -156,11 +157,13 @@ class ObjectImporter(object):
names=[(rhs, (0, 0))], names=[(rhs, (0, 0))],
start_pos=(0, 0), start_pos=(0, 0),
end_pos=(None, None)) end_pos=(None, None))
token_list = [lhsname, (tokenize.OP, '=', (0, 0)), rhsname] token_list = [lhsname, token.Token.from_tuple(
(tokenize.OP, '=', (0, 0))
), rhsname]
if call: if call:
token_list.extend([ token_list.extend([
(tokenize.OP, '(', (0, 0)), token.Token.from_tuple((tokenize.OP, '(', (0, 0))),
(tokenize.OP, ')', (0, 0)), token.Token.from_tuple((tokenize.OP, ')', (0, 0))),
]) ])
return pr.Statement( return pr.Statement(
module=submodule, module=submodule,

View File

@@ -24,6 +24,7 @@ from jedi._compatibility import next, StringIO
from jedi import debug from jedi import debug
from jedi import common from jedi import common
from jedi.parser import representation as pr from jedi.parser import representation as pr
from jedi.parser import token as token_pr
class Parser(object): class Parser(object):
@@ -271,8 +272,11 @@ class Parser(object):
first_pos = self.start_pos first_pos = self.start_pos
token_type, cname = self.next() token_type, cname = self.next()
if token_type != tokenize.NAME: if token_type != tokenize.NAME:
debug.warning("class: syntax err, token is not a name@%s (%s: %s)" debug.warning(
% (self.start_pos[0], tokenize.tok_name[token_type], cname)) "class: syntax err, token is not a name@%s (%s: %s)" % (
self.start_pos[0], tokenize.tok_name[token_type], cname
)
)
return None return None
cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos, cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos,
@@ -345,11 +349,17 @@ class Parser(object):
or tok in breaks and level <= 0): or tok in breaks and level <= 0):
try: try:
# print 'parse_stmt', tok, tokenize.tok_name[token_type] # print 'parse_stmt', tok, tokenize.tok_name[token_type]
tok_list.append(self._current + (self.start_pos,)) tok_list.append(
token_pr.Token.from_tuple(
self._current + (self.start_pos,)
)
)
if tok == 'as': if tok == 'as':
token_type, tok = self.next() token_type, tok = self.next()
if token_type == tokenize.NAME: if token_type == tokenize.NAME:
n, token_type, tok = self._parse_dot_name(self._current) n, token_type, tok = self._parse_dot_name(
self._current
)
if n: if n:
set_vars.append(n) set_vars.append(n)
as_names.append(n) as_names.append(n)
@@ -382,21 +392,20 @@ class Parser(object):
first_tok = tok_list[0] first_tok = tok_list[0]
# docstrings # docstrings
if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \ if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \
and first_tok[0] == tokenize.STRING: and first_tok.token_type == tokenize.STRING:
# Normal docstring check # Normal docstring check
if self.freshscope and not self.no_docstr: if self.freshscope and not self.no_docstr:
self._scope.add_docstr(first_tok[1]) self._scope.add_docstr(first_tok.token)
return None, tok return None, tok
# Attribute docstring (PEP 224) support (sphinx uses it, e.g.) # Attribute docstring (PEP 224) support (sphinx uses it, e.g.)
# If string literal is being parsed... # If string literal is being parsed...
elif first_tok[0] == tokenize.STRING: elif first_tok.token_type == tokenize.STRING:
with common.ignored(IndexError, AttributeError): with common.ignored(IndexError, AttributeError):
# ...then set it as a docstring # ...then set it as a docstring
self._scope.statements[-1].add_docstr(first_tok[1]) self._scope.statements[-1].add_docstr(first_tok.token)
return None, tok return None, tok
stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos, stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos,
as_names=as_names, as_names=as_names,
names_are_set_vars=names_are_set_vars) names_are_set_vars=names_are_set_vars)
@@ -435,9 +444,11 @@ class Parser(object):
s = s.parent s = s.parent
raise raise
if self.user_position and (self.start_pos[0] == self.user_position[0] if self.user_position and (
self.start_pos[0] == self.user_position[0]
or self.user_scope is None or self.user_scope is None
and self.start_pos[0] >= self.user_position[0]): and self.start_pos[0] >= self.user_position[0]
):
debug.dbg('user scope found [%s] = %s' % debug.dbg('user scope found [%s] = %s' %
(self.parserline.replace('\n', ''), repr(self._scope))) (self.parserline.replace('\n', ''), repr(self._scope)))
self.user_scope = self._scope self.user_scope = self._scope
@@ -489,8 +500,9 @@ class Parser(object):
and not isinstance(self._scope, pr.SubModule): and not isinstance(self._scope, pr.SubModule):
self._scope = self.module self._scope = self.module
use_as_parent_scope = self.top_module if isinstance(self._scope, use_as_parent_scope = self.top_module if isinstance(
pr.SubModule) else self._scope self._scope, pr.SubModule
) else self._scope
first_pos = self.start_pos first_pos = self.start_pos
if tok == 'def': if tok == 'def':
func = self._parse_function() func = self._parse_function()

View File

@@ -898,7 +898,7 @@ class Statement(Simple):
c = token_iterator.current[1] c = token_iterator.current[1]
arr.end_pos = c.end_pos if isinstance(c, Simple) \ arr.end_pos = c.end_pos if isinstance(c, Simple) \
else (c[2][0], c[2][1] + len(c[1])) else c.end_pos
return arr, break_tok return arr, break_tok
def parse_stmt(token_iterator, maybe_dict=False, added_breaks=(), def parse_stmt(token_iterator, maybe_dict=False, added_breaks=(),
@@ -920,9 +920,10 @@ class Statement(Simple):
# it's not possible to set it earlier # it's not possible to set it earlier
tok.parent = self tok.parent = self
else: else:
token_type, tok, start_tok_pos = tok_temp tok = tok_temp.token
start_tok_pos = tok_temp.start_pos
last_end_pos = end_pos last_end_pos = end_pos
end_pos = start_tok_pos[0], start_tok_pos[1] + len(tok) end_pos = tok_temp.end_pos
if first: if first:
first = False first = False
start_pos = start_tok_pos start_pos = start_tok_pos
@@ -932,8 +933,12 @@ class Statement(Simple):
if lambd is not None: if lambd is not None:
token_list.append(lambd) token_list.append(lambd)
elif tok == 'for': elif tok == 'for':
list_comp, tok = parse_list_comp(token_iterator, list_comp, tok = parse_list_comp(
token_list, start_pos, last_end_pos) token_iterator,
token_list,
start_pos,
last_end_pos
)
if list_comp is not None: if list_comp is not None:
token_list = [list_comp] token_list = [list_comp]
@@ -944,9 +949,12 @@ class Statement(Simple):
if level == 0 and tok in closing_brackets \ if level == 0 and tok in closing_brackets \
or tok in added_breaks \ or tok in added_breaks \
or level == 1 and (tok == ',' or level == 1 and (
tok == ','
or maybe_dict and tok == ':' or maybe_dict and tok == ':'
or is_assignment(tok) and break_on_assignment): or is_assignment(tok)
and break_on_assignment
):
end_pos = end_pos[0], end_pos[1] - 1 end_pos = end_pos[0], end_pos[1] - 1
break break
token_list.append(tok_temp) token_list.append(tok_temp)
@@ -954,8 +962,14 @@ class Statement(Simple):
if not token_list: if not token_list:
return None, tok return None, tok
statement = stmt_class(self._sub_module, token_list, statement = stmt_class(
start_pos, end_pos, self.parent, set_name_parents=False) self._sub_module,
token_list,
start_pos,
end_pos,
self.parent,
set_name_parents=False
)
return statement, tok return statement, tok
def parse_lambda(token_iterator): def parse_lambda(token_iterator):
@@ -984,8 +998,9 @@ class Statement(Simple):
return lambd, tok return lambd, tok
def parse_list_comp(token_iterator, token_list, start_pos, end_pos): def parse_list_comp(token_iterator, token_list, start_pos, end_pos):
def parse_stmt_or_arr(token_iterator, added_breaks=(), def parse_stmt_or_arr(
names_are_set_vars=False): token_iterator, added_breaks=(), names_are_set_vars=False
):
stmt, tok = parse_stmt(token_iterator, stmt, tok = parse_stmt(token_iterator,
added_breaks=added_breaks) added_breaks=added_breaks)
if not stmt: if not stmt:
@@ -1039,12 +1054,16 @@ class Statement(Simple):
start_pos = tok.start_pos start_pos = tok.start_pos
end_pos = tok.end_pos end_pos = tok.end_pos
else: else:
token_type, tok, start_pos = tok_temp token_type = tok_temp.token_type
end_pos = start_pos[0], start_pos[1] + len(tok) tok = tok_temp.token
start_pos = tok_temp.start_pos
end_pos = tok_temp.end_pos
if is_assignment(tok): if is_assignment(tok):
# This means, there is an assignment here. # This means, there is an assignment here.
# Add assignments, which can be more than one # Add assignments, which can be more than one
self._assignment_details.append((result, tok)) self._assignment_details.append(
(result, tok_temp.token)
)
result = [] result = []
is_chain = False is_chain = False
continue continue
@@ -1072,8 +1091,9 @@ class Statement(Simple):
result.append(call) result.append(call)
is_chain = False is_chain = False
elif tok in brackets.keys(): elif tok in brackets.keys():
arr, is_ass = parse_array(token_iterator, brackets[tok], arr, is_ass = parse_array(
start_pos) token_iterator, brackets[tok], start_pos
)
if result and isinstance(result[-1], StatementElement): if result and isinstance(result[-1], StatementElement):
result[-1].set_execution(arr) result[-1].set_execution(arr)
else: else:
@@ -1098,8 +1118,14 @@ class Statement(Simple):
e = (t[2][0], t[2][1] + len(t[1])) \ e = (t[2][0], t[2][1] + len(t[1])) \
if isinstance(t, tuple) else t.start_pos if isinstance(t, tuple) else t.start_pos
stmt = Statement(self._sub_module, result, stmt = Statement(
start_pos, e, self.parent, set_name_parents=False) self._sub_module,
result,
start_pos,
e,
self.parent,
set_name_parents=False
)
stmt._commands = result stmt._commands = result
arr, break_tok = parse_array(token_iterator, Array.TUPLE, arr, break_tok = parse_array(token_iterator, Array.TUPLE,
stmt.start_pos, stmt) stmt.start_pos, stmt)

128
jedi/parser/token.py Normal file
View File

@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*-
""" Efficient representation of tokens
We want to have a token_list and start_position for everything the
tokenizer returns. Therefore we need a memory efficient class. We
found that a flat object with slots is the best.
"""
from jedi._compatibility import utf8, unicode
class Token(object):
"""The token object is an efficient representation of the structure
(token_type, token, (start_pos_line, start_pos_col)). It has indexer
methods that maintain compatibility to existing code that expects the above
structure.
>>> tuple(Token(1,2,3,4))
(1, 2, (3, 4))
>>> unicode(Token(1, "test", 1, 1)) == "test"
True
>>> repr(Token(1, "test", 1, 1))
"<Token: (1, 'test', (1, 1))>"
>>> Token(1, 2, 3, 4).__getstate__()
(1, 2, 3, 4)
>>> a = Token(0, 0, 0, 0)
>>> a.__setstate__((1, 2, 3, 4))
>>> a
<Token: (1, 2, (3, 4))>
>>> a.start_pos
(3, 4)
>>> a.token
2
>>> a.start_pos_col
4
>>> Token.from_tuple((6, 5, (4, 3)))
<Token: (6, 5, (4, 3))>
>>> unicode(Token(1, utf8("😷"), 1 ,1)) + "p" == utf8("😷p")
True
"""
__slots__ = [
"_token_type", "_token", "_start_pos_line", "_start_pos_col"
]
@classmethod
def from_tuple(cls, tp):
return Token(tp[0], tp[1], tp[2][0], tp[2][1])
def __init__(
self, token_type, token, start_pos_line, start_pos_col
):
self._token_type = token_type
self._token = token
self._start_pos_line = start_pos_line
self._start_pos_col = start_pos_col
def __repr__(self):
return "<%s: %s>" % (type(self).__name__, tuple(self))
# Backward compatibility py2
def __unicode__(self):
return unicode(self.token)
# Backward compatibility py3
def __str__(self):
return unicode(self.token)
# Backward compatibility
def __getitem__(self, key):
# Builds the same structure as tuple used to have
if key == 0:
return self.token_type
elif key == 1:
return self.token
elif key == 2:
return (self.start_pos_line, self.start_pos_col)
else:
raise IndexError("list index out of range")
@property
def token_type(self):
return self._token_type
@property
def token(self):
return self._token
@property
def start_pos_line(self):
return self._start_pos_line
@property
def start_pos_col(self):
return self._start_pos_col
# Backward compatibility
@property
def start_pos(self):
return (self.start_pos_line, self.start_pos_col)
@property
def end_pos(self):
"""Returns end position respecting multiline tokens."""
end_pos_line = self.start_pos_line
lines = unicode(self).split('\n')
end_pos_line += len(lines) - 1
end_pos_col = self.start_pos_col
# Check for multiline token
if self.start_pos_line == end_pos_line:
end_pos_col += len(lines[-1])
else:
end_pos_col = len(lines[-1])
return (end_pos_line, end_pos_col)
# Make cache footprint smaller for faster unpickling
def __getstate__(self):
return (
self.token_type,
self.token,
self.start_pos_line,
self.start_pos_col,
)
def __setstate__(self, state):
self._token_type = state[0]
self._token = state[1]
self._start_pos_line = state[2]
self._start_pos_col = state[3]

View File

@@ -1,6 +1,7 @@
from jedi.parser import Parser from jedi.parser import Parser
from jedi.parser import representation as pr from jedi.parser import representation as pr
def test_user_statement_on_import(): def test_user_statement_on_import():
"""github #285""" """github #285"""
s = "from datetime import (\n" \ s = "from datetime import (\n" \
@@ -9,7 +10,7 @@ def test_user_statement_on_import():
for pos in [(2, 1), (2, 4)]: for pos in [(2, 1), (2, 4)]:
u = Parser(s, user_position=pos).user_stmt u = Parser(s, user_position=pos).user_stmt
assert isinstance(u, pr.Import) assert isinstance(u, pr.Import)
assert u.defunct == False assert u.defunct is False
assert [str(n) for n in u.get_defined_names()] == ['time'] assert [str(n) for n in u.get_defined_names()] == ['time']
@@ -47,6 +48,7 @@ class TestCallAndName():
assert isinstance(literal, pr.String) assert isinstance(literal, pr.String)
assert literal.value == 'hello' assert literal.value == 'hello'
class TestSubscopes(): class TestSubscopes():
def get_sub(self, source): def get_sub(self, source):
return Parser(source).module.subscopes[0] return Parser(source).module.subscopes[0]
@@ -62,6 +64,7 @@ class TestSubscopes():
assert name.end_pos == (1, len('def foo')) assert name.end_pos == (1, len('def foo'))
assert str(name) == 'foo' assert str(name) == 'foo'
class TestImports(): class TestImports():
def get_import(self, source): def get_import(self, source):
return Parser(source).module.imports[0] return Parser(source).module.imports[0]

25
test/test_token.py Normal file
View File

@@ -0,0 +1,25 @@
import jedi.parser as parser
try:
import unittest2 as unittest
except ImportError: # pragma: no cover
import unittest
class TokenTest(unittest.TestCase):
def test_end_pos_one_line(self):
parsed = parser.Parser('''
def testit():
a = "huhu"
''')
tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
self.assertEqual(tok.end_pos, (3, 14))
def test_end_pos_multi_line(self):
parsed = parser.Parser('''
def testit():
a = """huhu
asdfasdf""" + "h"
''')
tok = parsed.top_module.subscopes[0].statements[0].token_list[2]
self.assertEqual(tok.end_pos, (4, 11))