From 989e12e8a7546c9cd2c03f856502a546c2c3a942 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Thu, 5 Dec 2013 23:34:20 +0100 Subject: [PATCH 1/8] * Replaced token tuple with token class * Fixed PEP8 where I read code --- jedi/cache.py | 2 +- jedi/common.py | 9 +++++++-- jedi/parser/__init__.py | 32 ++++++++++++++++++++++---------- test/test_parsing.py | 5 ++++- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/jedi/cache.py b/jedi/cache.py index 69d5e972..ef9a94b9 100644 --- a/jedi/cache.py +++ b/jedi/cache.py @@ -220,7 +220,7 @@ def save_module(path, name, parser, pickling=True): class _ModulePickling(object): - version = 4 + version = 5 """ Version number (integer) for file system cache. diff --git a/jedi/common.py b/jedi/common.py index a7a4d8b5..929c8497 100644 --- a/jedi/common.py +++ b/jedi/common.py @@ -146,8 +146,13 @@ class NoErrorTokenizer(object): if self.is_fast_parser \ and self.previous[0] in (tokenize.INDENT, tokenize.NL, None, tokenize.NEWLINE, tokenize.DEDENT) \ - and c[0] not in (tokenize.COMMENT, tokenize.INDENT, - tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT): + and c[0] not in ( + tokenize.COMMENT, + tokenize.INDENT, + tokenize.NL, + tokenize.NEWLINE, + tokenize.DEDENT + ): # print c, tokenize.tok_name[c[0]] tok = c[1] diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 8a656e28..96256186 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -24,6 +24,7 @@ from jedi._compatibility import next, StringIO from jedi import debug from jedi import common from jedi.parser import representation as pr +from jedi.parser import token as token_pr class Parser(object): @@ -271,8 +272,11 @@ class Parser(object): first_pos = self.start_pos token_type, cname = self.next() if token_type != tokenize.NAME: - debug.warning("class: syntax err, token is not a name@%s (%s: %s)" - % (self.start_pos[0], tokenize.tok_name[token_type], cname)) + debug.warning( + "class: syntax err, token is not a name@%s (%s: %s)" % ( + self.start_pos[0], tokenize.tok_name[token_type], cname + ) + ) return None cname = pr.Name(self.module, [(cname, self.start_pos)], self.start_pos, @@ -345,11 +349,17 @@ class Parser(object): or tok in breaks and level <= 0): try: # print 'parse_stmt', tok, tokenize.tok_name[token_type] - tok_list.append(self._current + (self.start_pos,)) + tok_list.append( + token_pr.Token.from_tuple( + self._current + (self.start_pos,) + ) + ) if tok == 'as': token_type, tok = self.next() if token_type == tokenize.NAME: - n, token_type, tok = self._parse_dot_name(self._current) + n, token_type, tok = self._parse_dot_name( + self._current + ) if n: set_vars.append(n) as_names.append(n) @@ -396,7 +406,6 @@ class Parser(object): self._scope.statements[-1].add_docstr(first_tok[1]) return None, tok - stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos, as_names=as_names, names_are_set_vars=names_are_set_vars) @@ -435,9 +444,11 @@ class Parser(object): s = s.parent raise - if self.user_position and (self.start_pos[0] == self.user_position[0] - or self.user_scope is None - and self.start_pos[0] >= self.user_position[0]): + if self.user_position and ( + self.start_pos[0] == self.user_position[0] + or self.user_scope is None + and self.start_pos[0] >= self.user_position[0] + ): debug.dbg('user scope found [%s] = %s' % (self.parserline.replace('\n', ''), repr(self._scope))) self.user_scope = self._scope @@ -489,8 +500,9 @@ class Parser(object): and not isinstance(self._scope, pr.SubModule): self._scope = self.module - use_as_parent_scope = self.top_module if isinstance(self._scope, - pr.SubModule) else self._scope + use_as_parent_scope = self.top_module if isinstance( + self._scope, pr.SubModule + ) else self._scope first_pos = self.start_pos if tok == 'def': func = self._parse_function() diff --git a/test/test_parsing.py b/test/test_parsing.py index 2894d5f1..07336afe 100644 --- a/test/test_parsing.py +++ b/test/test_parsing.py @@ -1,6 +1,7 @@ from jedi.parser import Parser from jedi.parser import representation as pr + def test_user_statement_on_import(): """github #285""" s = "from datetime import (\n" \ @@ -9,7 +10,7 @@ def test_user_statement_on_import(): for pos in [(2, 1), (2, 4)]: u = Parser(s, user_position=pos).user_stmt assert isinstance(u, pr.Import) - assert u.defunct == False + assert u.defunct is False assert [str(n) for n in u.get_defined_names()] == ['time'] @@ -47,6 +48,7 @@ class TestCallAndName(): assert isinstance(literal, pr.String) assert literal.value == 'hello' + class TestSubscopes(): def get_sub(self, source): return Parser(source).module.subscopes[0] @@ -62,6 +64,7 @@ class TestSubscopes(): assert name.end_pos == (1, len('def foo')) assert str(name) == 'foo' + class TestImports(): def get_import(self, source): return Parser(source).module.imports[0] From 099a6c26979767db96b2b78ab39a1742a7d2a0c0 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Thu, 5 Dec 2013 23:59:48 +0100 Subject: [PATCH 2/8] * added token.py --- jedi/cache.py | 2 +- jedi/parser/token.py | 78 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 jedi/parser/token.py diff --git a/jedi/cache.py b/jedi/cache.py index ef9a94b9..49ff3fda 100644 --- a/jedi/cache.py +++ b/jedi/cache.py @@ -262,7 +262,7 @@ class _ModulePickling(object): parser_cache_item = pickle.load(f) finally: gc.enable() - + debug.dbg('pickle loaded', path) parser_cache[path] = parser_cache_item return parser_cache_item.parser diff --git a/jedi/parser/token.py b/jedi/parser/token.py new file mode 100644 index 00000000..34f78c5b --- /dev/null +++ b/jedi/parser/token.py @@ -0,0 +1,78 @@ +""" Efficient representation of tokens + +We want to have a token_list and start_position for everything the +tokenizer returns. Therefore we need a memory efficient class. We +found that a flat object with slots is the best. The Token object is +that plus indexing and string backwards compatibility. + +""" + +class Token(object): + __slots__ = [ + "token_type", "token", "start_pos_line", "start_pos_col" + ] + + @classmethod + def from_tuple(cls, tp): + return Token(tp[0], tp[1], tp[2][0], tp[2][1]) + + def __init__( + self, token_type, token, start_pos_line, start_pos_col + ): + self.token_type = token_type + self.token = token + self.start_pos_line = start_pos_line + self.start_pos_col = start_pos_col + + def __repr__(self): + return "<%s: %s>" % (type(self).__name__, tuple(self)) + + # Backward compatibility + def __str__(self): + return str(self.token) + + # Backward compatibility + def __getitem__(self, key): + # Builds the same structure as tuple used to have + if key == 0: + return self.token_type + elif key == 1: + return self.token + elif key == 2: + return (self.start_pos_line, self.start_pos_col) + else: + raise IndexError() + + # Backward compatibility + def __setitem__(self, key, value): + # setitem analogous to + if key == 0: + self.token_type = value + elif key == 1: + self.token = value + elif key == 2: + self.start_pos_line = value[0] + self.start_pos_col = value[1] + else: + raise IndexError() + + # Backward compatibility + def __getattr__(self, attr): + # Expose the missing start_pos attribute + if attr == "start_pos": + return (self.start_pos_line, self.start_pos_col) + else: + raise AttributeError( + "type '%s' has no attriubte '%s'" % ( + type(self), + attr + ) + ) + + # Make cache footprint smaller + def __getstate__(self): + return tuple(self) + + def __setstate__(self, state): + for i in range(len(state)): + self[i] = state[i] From ced926b7f057a281c613f39bfd86df58d2ce41f3 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Fri, 6 Dec 2013 00:11:38 +0100 Subject: [PATCH 3/8] * cleanup --- jedi/parser/token.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 34f78c5b..77dbe097 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -34,7 +34,7 @@ class Token(object): # Backward compatibility def __getitem__(self, key): # Builds the same structure as tuple used to have - if key == 0: + if key == 0: return self.token_type elif key == 1: return self.token @@ -46,7 +46,7 @@ class Token(object): # Backward compatibility def __setitem__(self, key, value): # setitem analogous to - if key == 0: + if key == 0: self.token_type = value elif key == 1: self.token = value From f1c49db6b6cbace2891f1b954f4fb66f9a30903d Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Fri, 6 Dec 2013 02:51:28 +0100 Subject: [PATCH 4/8] * Added doctests to Token class * Added __setattr__ for symmetry * Code cleanup --- jedi/parser/token.py | 75 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 77dbe097..4d83930a 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -2,12 +2,45 @@ We want to have a token_list and start_position for everything the tokenizer returns. Therefore we need a memory efficient class. We -found that a flat object with slots is the best. The Token object is -that plus indexing and string backwards compatibility. - +found that a flat object with slots is the best. """ + class Token(object): + """The token object is an efficient representation of the structure + (token_type, token, (start_pos_line, start_pos_col)). It has indexer + methods that maintain compatibility to existing code that expects the above + structure. + + >>> tuple(Token(1,2,3,4)) + (1, 2, (3, 4)) + >>> str(Token(1, "test", 1, 1)) + 'test' + >>> repr(Token(1, "test", 1, 1)) + "" + >>> Token(1, 2, 3, 4).__getstate__() + (1, 2, 3, 4) + >>> a = Token(0, 0, 0, 0) + >>> a.__setstate__((1, 2, 3, 4)) + >>> a + + >>> a[2] = (2, 1) + >>> a + + >>> a.start_pos + (2, 1) + >>> a.token + 2 + >>> a.start_pos = (3, 4) + >>> a + + >>> a.start_pos + (3, 4) + >>> a.start_pos_col + 4 + >>> Token.from_tuple((6, 5, (4, 3))) + + """ __slots__ = [ "token_type", "token", "start_pos_line", "start_pos_col" ] @@ -41,11 +74,11 @@ class Token(object): elif key == 2: return (self.start_pos_line, self.start_pos_col) else: - raise IndexError() + raise IndexError("list index out of range") # Backward compatibility def __setitem__(self, key, value): - # setitem analogous to + # setitem analogous to getitem if key == 0: self.token_type = value elif key == 1: @@ -54,7 +87,7 @@ class Token(object): self.start_pos_line = value[0] self.start_pos_col = value[1] else: - raise IndexError() + raise IndexError("list index out of range") # Backward compatibility def __getattr__(self, attr): @@ -62,17 +95,27 @@ class Token(object): if attr == "start_pos": return (self.start_pos_line, self.start_pos_col) else: - raise AttributeError( - "type '%s' has no attriubte '%s'" % ( - type(self), - attr - ) - ) + return object.__getattr__(self, attr) - # Make cache footprint smaller + def __setattr__(self, attr, value): + # setattr analogous to getattr for symmetry + if attr == "start_pos": + self.start_pos_line = value[0] + self.start_pos_col = value[1] + else: + object.__setattr__(self, attr, value) + + # Make cache footprint smaller for faster unpickling def __getstate__(self): - return tuple(self) + return ( + self.token_type, + self.token, + self.start_pos_line, + self.start_pos_col, + ) def __setstate__(self, state): - for i in range(len(state)): - self[i] = state[i] + self.token_type = state[0] + self.token = state[1] + self.start_pos_line = state[2] + self.start_pos_col = state[3] From 5c543ac364fe6041a4c6ee6a5632fc180ced819f Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Sat, 7 Dec 2013 22:50:20 +0100 Subject: [PATCH 5/8] * unicode compatibility --- jedi/parser/token.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 4d83930a..6c025a80 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Efficient representation of tokens We want to have a token_list and start_position for everything the @@ -5,6 +6,8 @@ tokenizer returns. Therefore we need a memory efficient class. We found that a flat object with slots is the best. """ +from jedi._compatibility import unicode + class Token(object): """The token object is an efficient representation of the structure @@ -14,8 +17,8 @@ class Token(object): >>> tuple(Token(1,2,3,4)) (1, 2, (3, 4)) - >>> str(Token(1, "test", 1, 1)) - 'test' + >>> unicode(Token(1, "test", 1, 1)) == "test" + True >>> repr(Token(1, "test", 1, 1)) "" >>> Token(1, 2, 3, 4).__getstate__() @@ -40,6 +43,8 @@ class Token(object): 4 >>> Token.from_tuple((6, 5, (4, 3))) + >>> unicode(Token(1, u"😷", 1 ,1)) + "p" == u"😷p" + True """ __slots__ = [ "token_type", "token", "start_pos_line", "start_pos_col" @@ -60,9 +65,13 @@ class Token(object): def __repr__(self): return "<%s: %s>" % (type(self).__name__, tuple(self)) - # Backward compatibility + # Backward compatibility py2 + def __unicode__(self): + return unicode(self.token) + + # Backward compatibility py3 def __str__(self): - return str(self.token) + return unicode(self.token) # Backward compatibility def __getitem__(self, key): From e204c27ecc0d2baec5e5bc7462056f8ff28d76bc Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Sat, 7 Dec 2013 22:53:01 +0100 Subject: [PATCH 6/8] * imported u for python 3.2 --- jedi/parser/token.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 6c025a80..8e02cc18 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -6,7 +6,7 @@ tokenizer returns. Therefore we need a memory efficient class. We found that a flat object with slots is the best. """ -from jedi._compatibility import unicode +from jedi._compatibility import unicode, u class Token(object): From 13680945d634db5fe3cb3bbf7cd1ed5c30a9d027 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Sat, 7 Dec 2013 23:00:47 +0100 Subject: [PATCH 7/8] * imported utf8 for python 3.2 --- jedi/parser/token.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 8e02cc18..2c0a9dd1 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -6,7 +6,7 @@ tokenizer returns. Therefore we need a memory efficient class. We found that a flat object with slots is the best. """ -from jedi._compatibility import unicode, u +from jedi._compatibility import utf8, unicode class Token(object): @@ -43,7 +43,7 @@ class Token(object): 4 >>> Token.from_tuple((6, 5, (4, 3))) - >>> unicode(Token(1, u"😷", 1 ,1)) + "p" == u"😷p" + >>> unicode(Token(1, utf8("😷"), 1 ,1)) + "p" == utf8("😷p") True """ __slots__ = [ From 3204a39f6c4762a26be45ca7b8c0a66373bb70e6 Mon Sep 17 00:00:00 2001 From: Jean-Louis Fuchs Date: Sun, 8 Dec 2013 01:32:58 +0100 Subject: [PATCH 8/8] * made Token readonly like a tuple by using @property * end_pos calculation didn't respect multiline tokens * replaced all index access to Token * wrapped all code that injects token tuples with Token.from_tuple() * repr of Token is still its tuple form!? * PEP8 where I read or wrote code --- jedi/interpret.py | 9 ++-- jedi/parser/__init__.py | 8 ++-- jedi/parser/representation.py | 66 ++++++++++++++++++--------- jedi/parser/token.py | 84 +++++++++++++++++------------------ test/test_token.py | 25 +++++++++++ 5 files changed, 122 insertions(+), 70 deletions(-) create mode 100644 test/test_token.py diff --git a/jedi/interpret.py b/jedi/interpret.py index dadf1b28..2a7bc85d 100644 --- a/jedi/interpret.py +++ b/jedi/interpret.py @@ -6,6 +6,7 @@ import itertools import tokenize from jedi.parser import representation as pr +from jedi.parser import token class ObjectImporter(object): @@ -156,11 +157,13 @@ class ObjectImporter(object): names=[(rhs, (0, 0))], start_pos=(0, 0), end_pos=(None, None)) - token_list = [lhsname, (tokenize.OP, '=', (0, 0)), rhsname] + token_list = [lhsname, token.Token.from_tuple( + (tokenize.OP, '=', (0, 0)) + ), rhsname] if call: token_list.extend([ - (tokenize.OP, '(', (0, 0)), - (tokenize.OP, ')', (0, 0)), + token.Token.from_tuple((tokenize.OP, '(', (0, 0))), + token.Token.from_tuple((tokenize.OP, ')', (0, 0))), ]) return pr.Statement( module=submodule, diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 96256186..3e6c7f9c 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -392,18 +392,18 @@ class Parser(object): first_tok = tok_list[0] # docstrings if len(tok_list) == 1 and not isinstance(first_tok, pr.Name) \ - and first_tok[0] == tokenize.STRING: + and first_tok.token_type == tokenize.STRING: # Normal docstring check if self.freshscope and not self.no_docstr: - self._scope.add_docstr(first_tok[1]) + self._scope.add_docstr(first_tok.token) return None, tok # Attribute docstring (PEP 224) support (sphinx uses it, e.g.) # If string literal is being parsed... - elif first_tok[0] == tokenize.STRING: + elif first_tok.token_type == tokenize.STRING: with common.ignored(IndexError, AttributeError): # ...then set it as a docstring - self._scope.statements[-1].add_docstr(first_tok[1]) + self._scope.statements[-1].add_docstr(first_tok.token) return None, tok stmt = stmt_class(self.module, tok_list, first_pos, self.end_pos, diff --git a/jedi/parser/representation.py b/jedi/parser/representation.py index e82773ca..c0e931f6 100644 --- a/jedi/parser/representation.py +++ b/jedi/parser/representation.py @@ -898,7 +898,7 @@ class Statement(Simple): c = token_iterator.current[1] arr.end_pos = c.end_pos if isinstance(c, Simple) \ - else (c[2][0], c[2][1] + len(c[1])) + else c.end_pos return arr, break_tok def parse_stmt(token_iterator, maybe_dict=False, added_breaks=(), @@ -920,9 +920,10 @@ class Statement(Simple): # it's not possible to set it earlier tok.parent = self else: - token_type, tok, start_tok_pos = tok_temp - last_end_pos = end_pos - end_pos = start_tok_pos[0], start_tok_pos[1] + len(tok) + tok = tok_temp.token + start_tok_pos = tok_temp.start_pos + last_end_pos = end_pos + end_pos = tok_temp.end_pos if first: first = False start_pos = start_tok_pos @@ -932,8 +933,12 @@ class Statement(Simple): if lambd is not None: token_list.append(lambd) elif tok == 'for': - list_comp, tok = parse_list_comp(token_iterator, - token_list, start_pos, last_end_pos) + list_comp, tok = parse_list_comp( + token_iterator, + token_list, + start_pos, + last_end_pos + ) if list_comp is not None: token_list = [list_comp] @@ -944,9 +949,12 @@ class Statement(Simple): if level == 0 and tok in closing_brackets \ or tok in added_breaks \ - or level == 1 and (tok == ',' - or maybe_dict and tok == ':' - or is_assignment(tok) and break_on_assignment): + or level == 1 and ( + tok == ',' + or maybe_dict and tok == ':' + or is_assignment(tok) + and break_on_assignment + ): end_pos = end_pos[0], end_pos[1] - 1 break token_list.append(tok_temp) @@ -954,8 +962,14 @@ class Statement(Simple): if not token_list: return None, tok - statement = stmt_class(self._sub_module, token_list, - start_pos, end_pos, self.parent, set_name_parents=False) + statement = stmt_class( + self._sub_module, + token_list, + start_pos, + end_pos, + self.parent, + set_name_parents=False + ) return statement, tok def parse_lambda(token_iterator): @@ -984,8 +998,9 @@ class Statement(Simple): return lambd, tok def parse_list_comp(token_iterator, token_list, start_pos, end_pos): - def parse_stmt_or_arr(token_iterator, added_breaks=(), - names_are_set_vars=False): + def parse_stmt_or_arr( + token_iterator, added_breaks=(), names_are_set_vars=False + ): stmt, tok = parse_stmt(token_iterator, added_breaks=added_breaks) if not stmt: @@ -1039,12 +1054,16 @@ class Statement(Simple): start_pos = tok.start_pos end_pos = tok.end_pos else: - token_type, tok, start_pos = tok_temp - end_pos = start_pos[0], start_pos[1] + len(tok) + token_type = tok_temp.token_type + tok = tok_temp.token + start_pos = tok_temp.start_pos + end_pos = tok_temp.end_pos if is_assignment(tok): # This means, there is an assignment here. # Add assignments, which can be more than one - self._assignment_details.append((result, tok)) + self._assignment_details.append( + (result, tok_temp.token) + ) result = [] is_chain = False continue @@ -1072,8 +1091,9 @@ class Statement(Simple): result.append(call) is_chain = False elif tok in brackets.keys(): - arr, is_ass = parse_array(token_iterator, brackets[tok], - start_pos) + arr, is_ass = parse_array( + token_iterator, brackets[tok], start_pos + ) if result and isinstance(result[-1], StatementElement): result[-1].set_execution(arr) else: @@ -1098,8 +1118,14 @@ class Statement(Simple): e = (t[2][0], t[2][1] + len(t[1])) \ if isinstance(t, tuple) else t.start_pos - stmt = Statement(self._sub_module, result, - start_pos, e, self.parent, set_name_parents=False) + stmt = Statement( + self._sub_module, + result, + start_pos, + e, + self.parent, + set_name_parents=False + ) stmt._commands = result arr, break_tok = parse_array(token_iterator, Array.TUPLE, stmt.start_pos, stmt) diff --git a/jedi/parser/token.py b/jedi/parser/token.py index 2c0a9dd1..40e60eee 100644 --- a/jedi/parser/token.py +++ b/jedi/parser/token.py @@ -27,18 +27,10 @@ class Token(object): >>> a.__setstate__((1, 2, 3, 4)) >>> a - >>> a[2] = (2, 1) - >>> a - - >>> a.start_pos - (2, 1) - >>> a.token - 2 - >>> a.start_pos = (3, 4) - >>> a - >>> a.start_pos (3, 4) + >>> a.token + 2 >>> a.start_pos_col 4 >>> Token.from_tuple((6, 5, (4, 3))) @@ -47,7 +39,7 @@ class Token(object): True """ __slots__ = [ - "token_type", "token", "start_pos_line", "start_pos_col" + "_token_type", "_token", "_start_pos_line", "_start_pos_col" ] @classmethod @@ -57,10 +49,10 @@ class Token(object): def __init__( self, token_type, token, start_pos_line, start_pos_col ): - self.token_type = token_type - self.token = token - self.start_pos_line = start_pos_line - self.start_pos_col = start_pos_col + self._token_type = token_type + self._token = token + self._start_pos_line = start_pos_line + self._start_pos_col = start_pos_col def __repr__(self): return "<%s: %s>" % (type(self).__name__, tuple(self)) @@ -85,34 +77,40 @@ class Token(object): else: raise IndexError("list index out of range") - # Backward compatibility - def __setitem__(self, key, value): - # setitem analogous to getitem - if key == 0: - self.token_type = value - elif key == 1: - self.token = value - elif key == 2: - self.start_pos_line = value[0] - self.start_pos_col = value[1] - else: - raise IndexError("list index out of range") + @property + def token_type(self): + return self._token_type + + @property + def token(self): + return self._token + + @property + def start_pos_line(self): + return self._start_pos_line + + @property + def start_pos_col(self): + return self._start_pos_col # Backward compatibility - def __getattr__(self, attr): - # Expose the missing start_pos attribute - if attr == "start_pos": - return (self.start_pos_line, self.start_pos_col) - else: - return object.__getattr__(self, attr) + @property + def start_pos(self): + return (self.start_pos_line, self.start_pos_col) - def __setattr__(self, attr, value): - # setattr analogous to getattr for symmetry - if attr == "start_pos": - self.start_pos_line = value[0] - self.start_pos_col = value[1] + @property + def end_pos(self): + """Returns end position respecting multiline tokens.""" + end_pos_line = self.start_pos_line + lines = unicode(self).split('\n') + end_pos_line += len(lines) - 1 + end_pos_col = self.start_pos_col + # Check for multiline token + if self.start_pos_line == end_pos_line: + end_pos_col += len(lines[-1]) else: - object.__setattr__(self, attr, value) + end_pos_col = len(lines[-1]) + return (end_pos_line, end_pos_col) # Make cache footprint smaller for faster unpickling def __getstate__(self): @@ -124,7 +122,7 @@ class Token(object): ) def __setstate__(self, state): - self.token_type = state[0] - self.token = state[1] - self.start_pos_line = state[2] - self.start_pos_col = state[3] + self._token_type = state[0] + self._token = state[1] + self._start_pos_line = state[2] + self._start_pos_col = state[3] diff --git a/test/test_token.py b/test/test_token.py new file mode 100644 index 00000000..9b7b1a3f --- /dev/null +++ b/test/test_token.py @@ -0,0 +1,25 @@ +import jedi.parser as parser + +try: + import unittest2 as unittest +except ImportError: # pragma: no cover + import unittest + + +class TokenTest(unittest.TestCase): + def test_end_pos_one_line(self): + parsed = parser.Parser(''' +def testit(): + a = "huhu" +''') + tok = parsed.top_module.subscopes[0].statements[0].token_list[2] + self.assertEqual(tok.end_pos, (3, 14)) + + def test_end_pos_multi_line(self): + parsed = parser.Parser(''' +def testit(): + a = """huhu +asdfasdf""" + "h" +''') + tok = parsed.top_module.subscopes[0].statements[0].token_list[2] + self.assertEqual(tok.end_pos, (4, 11))