diff --git a/jedi/cache.py b/jedi/cache.py index 69d5e972..49ff3fda 100644 --- a/jedi/cache.py +++ b/jedi/cache.py @@ -220,7 +220,7 @@ def save_module(path, name, parser, pickling=True): class _ModulePickling(object): - version = 4 + version = 5 """ Version number (integer) for file system cache. @@ -262,7 +262,7 @@ class _ModulePickling(object): parser_cache_item = pickle.load(f) finally: gc.enable() - + debug.dbg('pickle loaded', path) parser_cache[path] = parser_cache_item return parser_cache_item.parser diff --git a/jedi/common.py b/jedi/common.py index a7a4d8b5..929c8497 100644 --- a/jedi/common.py +++ b/jedi/common.py @@ -146,8 +146,13 @@ class NoErrorTokenizer(object): if self.is_fast_parser \ and self.previous[0] in (tokenize.INDENT, tokenize.NL, None, tokenize.NEWLINE, tokenize.DEDENT) \ - and c[0] not in (tokenize.COMMENT, tokenize.INDENT, - tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT): + and c[0] not in ( + tokenize.COMMENT, + tokenize.INDENT, + tokenize.NL, + tokenize.NEWLINE, + tokenize.DEDENT + ): # print c, tokenize.tok_name[c[0]] tok = c[1] diff --git a/jedi/interpret.py b/jedi/interpret.py index dadf1b28..2a7bc85d 100644 --- a/jedi/interpret.py +++ b/jedi/interpret.py @@ -6,6 +6,7 @@ import itertools import tokenize from jedi.parser import representation as pr +from jedi.parser import token class ObjectImporter(object): @@ -156,11 +157,13 @@ class ObjectImporter(object): names=[(rhs, (0, 0))], start_pos=(0, 0), end_pos=(None, None)) - token_list = [lhsname, (tokenize.OP, '=', (0, 0)), rhsname] + token_list = [lhsname, token.Token.from_tuple( + (tokenize.OP, '=', (0, 0)) + ), rhsname] if call: token_list.extend([ - (tokenize.OP, '(', (0, 0)), - (tokenize.OP, ')', (0, 0)), + token.Token.from_tuple((tokenize.OP, '(', (0, 0))), + token.Token.from_tuple((tokenize.OP, ')', (0, 0))), ]) return pr.Statement( module=submodule, diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 8a656e28..3e6c7f9c 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -24,6 +24,7 @@ from jedi._compatibility import next, StringIO from jedi import debug from jedi import common from jedi.parser import representation as pr +from jedi.parser import token as token_pr class Parser(object): @@ -271,8 +272,11 @@ class Parser(object): first_pos = self.start_pos token_type, cname = self.next() if token_type != tokenize.NAME: - debug.warning("class: syntax err, token is not a name@%s (%s: %s)" - % (self.start_pos[0], tokenize.tok_name[token_type], cname)) + debug.warning( + "class: syntax err, token is not a name@%s (%s: %s)" % ( + self.start_pos[0], tokenize.tok_name[token_type], cname + ) + ) return None cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos, @@ -345,11 +349,17 @@ class Parser(object): or tok in breaks and level <= 0): try: # print 'parse_stmt', tok, tokenize.tok_name[token_type] - tok_list.append(self._current + (self.start_pos,)) + tok_list.append( + token_pr.Token.from_tuple( + self._current + (self.start_pos,) + ) + ) if tok == 'as': token_type, tok = self.next() if token_type == tokenize.NAME: - n, token_type, tok = self._parse_dot_name(self._current) + n, token_type, tok = self._parse_dot_name( + self._current + ) if n: set_vars.append(n) as_names.append(n) @@ -382,21 +392,20 @@ class Parser(object): first_tok = tok_list[0] # docstrings if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \ - and first_tok[0] == tokenize.STRING: + and first_tok.token_type == tokenize.STRING: # Normal docstring check if self.freshscope and not self.no_docstr: - self._scope.add_docstr(first_tok[1]) + self._scope.add_docstr(first_tok.token) return None, tok # Attribute docstring (PEP 224) support (sphinx uses it, e.g.) # If string literal is being parsed... - elif first_tok[0] == tokenize.STRING: + elif first_tok.token_type == tokenize.STRING: with common.ignored(IndexError, AttributeError): # ...then set it as a docstring - self._scope.statements[-1].add_docstr(first_tok[1]) + self._scope.statements[-1].add_docstr(first_tok.token) return None, tok - stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos, as_names=as_names, names_are_set_vars=names_are_set_vars) @@ -435,9 +444,11 @@ class Parser(object): s = s.parent raise - if self.user_position and (self.start_pos[0] == self.user_position[0] - or self.user_scope is None - and self.start_pos[0] >= self.user_position[0]): + if self.user_position and ( + self.start_pos[0] == self.user_position[0] + or self.user_scope is None + and self.start_pos[0] >= self.user_position[0] + ): debug.dbg('user scope found [%s] = %s' % (self.parserline.replace('\n', ''), repr(self._scope))) self.user_scope = self._scope @@ -489,8 +500,9 @@ class Parser(object): and not isinstance(self._scope, pr.SubModule): self._scope = self.module - use_as_parent_scope = self.top_module if isinstance(self._scope, - pr.SubModule) else self._scope + use_as_parent_scope = self.top_module if isinstance( + self._scope, pr.SubModule + ) else self._scope first_pos = self.start_pos if tok == 'def': func = self._parse_function() diff --git a/jedi/parser/representation.py b/jedi/parser/representation.py index e82773ca..c0e931f6 100644 --- a/jedi/parser/representation.py +++ b/jedi/parser/representation.py @@ -898,7 +898,7 @@ class Statement(Simple): c = token_iterator.current[1] arr.end_pos = c.end_pos if isinstance(c, Simple) \ - else (c[2][0], c[2][1] + len(c[1])) + else c.end_pos return arr, break_tok def parse_stmt(token_iterator, maybe_dict=False, added_breaks=(), @@ -920,9 +920,10 @@ class Statement(Simple): # it's not possible to set it earlier tok.parent = self else: - token_type, tok, start_tok_pos = tok_temp - last_end_pos = end_pos - end_pos = start_tok_pos[0], start_tok_pos[1] + len(tok) + tok = tok_temp.token + start_tok_pos = tok_temp.start_pos + last_end_pos = end_pos + end_pos = tok_temp.end_pos if first: first = False start_pos = start_tok_pos @@ -932,8 +933,12 @@ class Statement(Simple): if lambd is not None: token_list.append(lambd) elif tok == 'for': - list_comp, tok = parse_list_comp(token_iterator, - token_list, start_pos, last_end_pos) + list_comp, tok = parse_list_comp( + token_iterator, + token_list, + start_pos, + last_end_pos + ) if list_comp is not None: token_list = [list_comp] @@ -944,9 +949,12 @@ class Statement(Simple): if level == 0 and tok in closing_brackets \ or tok in added_breaks \ - or level == 1 and (tok == ',' - or maybe_dict and tok == ':' - or is_assignment(tok) and break_on_assignment): + or level == 1 and ( + tok == ',' + or maybe_dict and tok == ':' + or is_assignment(tok) + and break_on_assignment + ): end_pos = end_pos[0], end_pos[1] - 1 break token_list.append(tok_temp) @@ -954,8 +962,14 @@ class Statement(Simple): if not token_list: return None, tok - statement = stmt_class(self._sub_module, token_list, - start_pos, end_pos, self.parent, set_name_parents=False) + statement = stmt_class( + self._sub_module, + token_list, + start_pos, + end_pos, + self.parent, + set_name_parents=False + ) return statement, tok def parse_lambda(token_iterator): @@ -984,8 +998,9 @@ class Statement(Simple): return lambd, tok def parse_list_comp(token_iterator, token_list, start_pos, end_pos): - def parse_stmt_or_arr(token_iterator, added_breaks=(), - names_are_set_vars=False): + def parse_stmt_or_arr( + token_iterator, added_breaks=(), names_are_set_vars=False + ): stmt, tok = parse_stmt(token_iterator, added_breaks=added_breaks) if not stmt: @@ -1039,12 +1054,16 @@ class Statement(Simple): start_pos = tok.start_pos end_pos = tok.end_pos else: - token_type, tok, start_pos = tok_temp - end_pos = start_pos[0], start_pos[1] + len(tok) + token_type = tok_temp.token_type + tok = tok_temp.token + start_pos = tok_temp.start_pos + end_pos = tok_temp.end_pos if is_assignment(tok): # This means, there is an assignment here. # Add assignments, which can be more than one - self._assignment_details.append((result, tok)) + self._assignment_details.append( + (result, tok_temp.token) + ) result = [] is_chain = False continue @@ -1072,8 +1091,9 @@ class Statement(Simple): result.append(call) is_chain = False elif tok in brackets.keys(): - arr, is_ass = parse_array(token_iterator, brackets[tok], - start_pos) + arr, is_ass = parse_array( + token_iterator, brackets[tok], start_pos + ) if result and isinstance(result[-1], StatementElement): result[-1].set_execution(arr) else: @@ -1098,8 +1118,14 @@ class Statement(Simple): e = (t[2][0], t[2][1] + len(t[1])) \ if isinstance(t, tuple) else t.start_pos - stmt = Statement(self._sub_module, result, - start_pos, e, self.parent, set_name_parents=False) + stmt = Statement( + self._sub_module, + result, + start_pos, + e, + self.parent, + set_name_parents=False + ) stmt._commands = result arr, break_tok = parse_array(token_iterator, Array.TUPLE, stmt.start_pos, stmt) diff --git a/jedi/parser/token.py b/jedi/parser/token.py new file mode 100644 index 00000000..40e60eee --- /dev/null +++ b/jedi/parser/token.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +""" Efficient representation of tokens + +We want to have a token_list and start_position for everything the +tokenizer returns. Therefore we need a memory efficient class. We +found that a flat object with slots is the best. +""" + +from jedi._compatibility import utf8, unicode + + +class Token(object): + """The token object is an efficient representation of the structure + (token_type, token, (start_pos_line, start_pos_col)). It has indexer + methods that maintain compatibility to existing code that expects the above + structure. + + >>> tuple(Token(1,2,3,4)) + (1, 2, (3, 4)) + >>> unicode(Token(1, "test", 1, 1)) == "test" + True + >>> repr(Token(1, "test", 1, 1)) + "" + >>> Token(1, 2, 3, 4).__getstate__() + (1, 2, 3, 4) + >>> a = Token(0, 0, 0, 0) + >>> a.__setstate__((1, 2, 3, 4)) + >>> a + + >>> a.start_pos + (3, 4) + >>> a.token + 2 + >>> a.start_pos_col + 4 + >>> Token.from_tuple((6, 5, (4, 3))) + + >>> unicode(Token(1, utf8("😷"), 1 ,1)) + "p" == utf8("😷p") + True + """ + __slots__ = [ + "_token_type", "_token", "_start_pos_line", "_start_pos_col" + ] + + @classmethod + def from_tuple(cls, tp): + return Token(tp[0], tp[1], tp[2][0], tp[2][1]) + + def __init__( + self, token_type, token, start_pos_line, start_pos_col + ): + self._token_type = token_type + self._token = token + self._start_pos_line = start_pos_line + self._start_pos_col = start_pos_col + + def __repr__(self): + return "<%s: %s>" % (type(self).__name__, tuple(self)) + + # Backward compatibility py2 + def __unicode__(self): + return unicode(self.token) + + # Backward compatibility py3 + def __str__(self): + return unicode(self.token) + + # Backward compatibility + def __getitem__(self, key): + # Builds the same structure as tuple used to have + if key == 0: + return self.token_type + elif key == 1: + return self.token + elif key == 2: + return (self.start_pos_line, self.start_pos_col) + else: + raise IndexError("list index out of range") + + @property + def token_type(self): + return self._token_type + + @property + def token(self): + return self._token + + @property + def start_pos_line(self): + return self._start_pos_line + + @property + def start_pos_col(self): + return self._start_pos_col + + # Backward compatibility + @property + def start_pos(self): + return (self.start_pos_line, self.start_pos_col) + + @property + def end_pos(self): + """Returns end position respecting multiline tokens.""" + end_pos_line = self.start_pos_line + lines = unicode(self).split('\n') + end_pos_line += len(lines) - 1 + end_pos_col = self.start_pos_col + # Check for multiline token + if self.start_pos_line == end_pos_line: + end_pos_col += len(lines[-1]) + else: + end_pos_col = len(lines[-1]) + return (end_pos_line, end_pos_col) + + # Make cache footprint smaller for faster unpickling + def __getstate__(self): + return ( + self.token_type, + self.token, + self.start_pos_line, + self.start_pos_col, + ) + + def __setstate__(self, state): + self._token_type = state[0] + self._token = state[1] + self._start_pos_line = state[2] + self._start_pos_col = state[3] diff --git a/test/test_parsing.py b/test/test_parsing.py index 2894d5f1..07336afe 100644 --- a/test/test_parsing.py +++ b/test/test_parsing.py @@ -1,6 +1,7 @@ from jedi.parser import Parser from jedi.parser import representation as pr + def test_user_statement_on_import(): """github #285""" s = "from datetime import (\n" \ @@ -9,7 +10,7 @@ def test_user_statement_on_import(): for pos in [(2, 1), (2, 4)]: u = Parser(s, user_position=pos).user_stmt assert isinstance(u, pr.Import) - assert u.defunct == False + assert u.defunct is False assert [str(n) for n in u.get_defined_names()] == ['time'] @@ -47,6 +48,7 @@ class TestCallAndName(): assert isinstance(literal, pr.String) assert literal.value == 'hello' + class TestSubscopes(): def get_sub(self, source): return Parser(source).module.subscopes[0] @@ -62,6 +64,7 @@ class TestSubscopes(): assert name.end_pos == (1, len('def foo')) assert str(name) == 'foo' + class TestImports(): def get_import(self, source): return Parser(source).module.imports[0] diff --git a/test/test_token.py b/test/test_token.py new file mode 100644 index 00000000..9b7b1a3f --- /dev/null +++ b/test/test_token.py @@ -0,0 +1,25 @@ +import jedi.parser as parser + +try: + import unittest2 as unittest +except ImportError: # pragma: no cover + import unittest + + +class TokenTest(unittest.TestCase): + def test_end_pos_one_line(self): + parsed = parser.Parser(''' +def testit(): + a = "huhu" +''') + tok = parsed.top_module.subscopes[0].statements[0].token_list[2] + self.assertEqual(tok.end_pos, (3, 14)) + + def test_end_pos_multi_line(self): + parsed = parser.Parser(''' +def testit(): + a = """huhu +asdfasdf""" + "h" +''') + tok = parsed.top_module.subscopes[0].statements[0].token_list[2] + self.assertEqual(tok.end_pos, (4, 11))