# -*- coding: utf-8 -*- """ Efficient representation of tokens We want to have a token_list and start_position for everything the tokenizer returns. Therefore we need a memory efficient class. We found that a flat object with slots is the best. """ from inspect import cleandoc from ast import literal_eval from jedi._compatibility import utf8, unicode class Token(object): """The token object is an efficient representation of the structure (token_type, token, (start_pos_line, start_pos_col)). It has indexer methods that maintain compatibility to existing code that expects the above structure. >>> tuple(Token(1,2,3,4)) (1, 2, (3, 4)) >>> unicode(Token(1, "test", 1, 1)) == "test" True >>> repr(Token(1, "test", 1, 1)) "" >>> Token(1, 2, 3, 4).__getstate__() (1, 2, 3, 4) >>> a = Token(0, 0, 0, 0) >>> a.__setstate__((1, 2, 3, 4)) >>> a >>> a.start_pos (3, 4) >>> a.token 2 >>> a.start_pos_col 4 >>> Token.from_tuple((6, 5, (4, 3))) >>> unicode(Token(1, utf8("😷"), 1 ,1)) + "p" == utf8("😷p") True """ __slots__ = [ "_token_type", "_token", "_start_pos_line", "_start_pos_col" ] @classmethod def from_tuple(cls, tp): return Token(tp[0], tp[1], tp[2][0], tp[2][1]) def __init__( self, token_type, token, start_pos_line, start_pos_col ): self._token_type = token_type self._token = token self._start_pos_line = start_pos_line self._start_pos_col = start_pos_col def __repr__(self): return "<%s: %s>" % (type(self).__name__, tuple(self)) # Backward compatibility py2 def __unicode__(self): return self.as_string() # Backward compatibility py3 def __str__(self): return self.as_string() def as_string(self): """For backward compatibilty str(token) or unicode(token) will work. BUT please use as_string() instead, because it is independant from the python version.""" return unicode(self.token) # Backward compatibility def __getitem__(self, key): # Builds the same structure as tuple used to have if key == 0: return self.token_type elif key == 1: return self.token elif key == 2: return (self.start_pos_line, self.start_pos_col) else: raise IndexError("list index out of range") @property def token_type(self): return self._token_type @property def token(self): return self._token @property def start_pos_line(self): return self._start_pos_line @property def start_pos_col(self): return self._start_pos_col @property def start_pos(self): return (self.start_pos_line, self.start_pos_col) @property def end_pos(self): """Returns end position respecting multiline tokens.""" end_pos_line = self.start_pos_line lines = unicode(self).split('\n') end_pos_line += len(lines) - 1 end_pos_col = self.start_pos_col # Check for multiline token if self.start_pos_line == end_pos_line: end_pos_col += len(lines[-1]) else: end_pos_col = len(lines[-1]) return (end_pos_line, end_pos_col) # Make cache footprint smaller for faster unpickling def __getstate__(self): return ( self.token_type, self.token, self.start_pos_line, self.start_pos_col, ) def __setstate__(self, state): self._token_type = state[0] self._token = state[1] self._start_pos_line = state[2] self._start_pos_col = state[3] class TokenNoCompat(Token): def __unicode__(self): raise NotImplementedError("Compatibility only for basic token.") def __str__(self): raise NotImplementedError("Compatibility only for basic token.") def __getitem__(self, key): raise NotImplementedError("Compatibility only for basic token.") class TokenDocstring(TokenNoCompat): """A string token that is a docstring. as_string() will clean the token representing the docstring. """ def __init__(self, token=None, state=None): if token: self.__setstate__(token.__getstate__()) else: self.__setstate__(state) @classmethod def fake_docstring(cls, docstr): # TODO: fixme when tests are up again return TokenDocstring(state=(0, '"""\n%s\n"""' % docstr, 0, 0)) def as_string(self): """Returns a literal cleaned version of the token""" return cleandoc(literal_eval(self.token))