jedi-fork/jedi/parser/token.py

# -*- coding: utf-8 -*-
""" Efficient representation of tokens

We want to have a token_list and start_position for everything the
tokenizer returns. Therefore we need a memory efficient class. We
found that a flat object with slots is the best.
"""
from inspect import cleandoc
from ast import literal_eval

from jedi._compatibility import utf8, unicode


class Token(object):
    """The token object is an efficient representation of the structure
    (token_type, token, (start_pos_line, start_pos_col)). It has indexer
    methods that maintain compatibility to existing code that expects the above
    structure.

    >>> tuple(Token(1,2,3,4))
    (1, 2, (3, 4))
    >>> unicode(Token(1, "test", 1, 1)) == "test"
    True
    >>> repr(Token(1, "test", 1, 1))
    "<Token: (1, 'test', (1, 1))>"
    >>> Token(1, 2, 3, 4).__getstate__()
    (1, 2, 3, 4)
    >>> a = Token(0, 0, 0, 0)
    >>> a.__setstate__((1, 2, 3, 4))
    >>> a
    <Token: (1, 2, (3, 4))>
    >>> a.start_pos
    (3, 4)
    >>> a.token
    2
    >>> a.start_pos_col
    4
    >>> Token.from_tuple((6, 5, (4, 3)))
    <Token: (6, 5, (4, 3))>
    >>> unicode(Token(1, utf8("😷"), 1 ,1)) + "p" == utf8("😷p")
    True
    """
    __slots__ = ("_token_type", "_token", "_start_pos_line", "_start_pos_col")

    @classmethod
    def from_tuple(cls, tp):
        return Token(tp[0], tp[1], tp[2][0], tp[2][1])

    def __init__(
        self, token_type, token, start_pos_line, start_pos_col
    ):
        self._token_type = token_type
        self._token = token
        self._start_pos_line = start_pos_line
        self._start_pos_col = start_pos_col

    def __repr__(self):
        return "<%s: %s>" % (type(self).__name__, tuple(self))

    # Backward compatibility py2
    def __unicode__(self):
        return self.as_string()

    # Backward compatibility py3
    def __str__(self):
        return self.as_string()

    def as_string(self):
        """For backward compatibilty str(token) or unicode(token) will work.
        BUT please use as_string() instead, because it is independant from the
        python version."""
        return unicode(self.token)

    # Backward compatibility
    def __getitem__(self, key):
        # Builds the same structure as tuple used to have
        if key == 0:
            return self.token_type
        elif key == 1:
            return self.token
        elif key == 2:
            return (self.start_pos_line, self.start_pos_col)
        else:
            raise IndexError("list index out of range")

    @property
    def token_type(self):
        return self._token_type

    @property
    def token(self):
        return self._token

    @property
    def start_pos_line(self):
        return self._start_pos_line

    @property
    def start_pos_col(self):
        return self._start_pos_col

    @property
    def start_pos(self):
        return (self.start_pos_line, self.start_pos_col)

    @property
    def end_pos(self):
        """Returns end position respecting multiline tokens."""
        end_pos_line = self.start_pos_line
        lines = unicode(self).split('\n')
        end_pos_line += len(lines) - 1
        end_pos_col = self.start_pos_col
        # Check for multiline token
        if self.start_pos_line == end_pos_line:
            end_pos_col += len(lines[-1])
        else:
            end_pos_col = len(lines[-1])
        return (end_pos_line, end_pos_col)

    # Make cache footprint smaller for faster unpickling
    def __getstate__(self):
        return (
            self.token_type,
            self.token,
            self.start_pos_line,
            self.start_pos_col,
        )

    def __setstate__(self, state):
        self._token_type = state[0]
        self._token = state[1]
        self._start_pos_line = state[2]
        self._start_pos_col = state[3]


class TokenNoCompat(Token):
    __slots__ = ()

    def __unicode__(self):
        raise NotImplementedError("Compatibility only for basic token.")

    def __str__(self):
        raise NotImplementedError("Compatibility only for basic token.")

    def __getitem__(self, key):
        raise NotImplementedError("Compatibility only for basic token.")


class TokenDocstring(TokenNoCompat):
    """A string token that is a docstring.

    as_string() will clean the token representing the docstring.
    """
    __slots__ = ()

    def __init__(self, token=None, state=None):
        if token:
            self.__setstate__(token.__getstate__())
        else:
            self.__setstate__(state)

    @classmethod
    def fake_docstring(cls, docstr):
        # TODO: fixme when tests are up again
        return TokenDocstring(state=(0, '"""\n%s\n"""' % docstr, 0, 0))

    def as_string(self):
        """Returns a literal cleaned version of the token"""
        return cleandoc(literal_eval(self.token))